diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh
index b347c443da677f..b00a4b984a1d23 100755
--- a/.ci/monolithic-linux.sh
+++ b/.ci/monolithic-linux.sh
@@ -48,7 +48,6 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
       -D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \
       -D LLVM_ENABLE_LLD=ON \
       -D CMAKE_CXX_FLAGS=-gmlt \
-      -D BOLT_CLANG_EXE=/usr/bin/clang \
       -D LLVM_CCACHE_BUILD=ON \
       -D MLIR_ENABLE_BINDINGS_PYTHON=ON
 
diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml
index 9cf64417d3cb2c..d608ea449f1d40 100644
--- a/.github/new-prs-labeler.yml
+++ b/.github/new-prs-labeler.yml
@@ -1,3 +1,6 @@
+BOLT:
+  - bolt/**/*
+
 ClangIR:
   - clang/include/clang/CIR/**/*
   - clang/lib/CIR/**/*
@@ -467,6 +470,7 @@ backend:m68k:
 
 libc++:
   - libcxx/**
+  - .github/workflows/libcxx-*
 
 libc++abi:
   - libcxxabi/**
diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h
index 8d89ef8b5484f8..5d7692559eda88 100644
--- a/bolt/include/bolt/Passes/BinaryPasses.h
+++ b/bolt/include/bolt/Passes/BinaryPasses.h
@@ -400,8 +400,7 @@ class PrintProfileStats : public BinaryFunctionPass {
 /// dyno stats categories.
 class PrintProgramStats : public BinaryFunctionPass {
 public:
-  explicit PrintProgramStats(const cl::opt<bool> &PrintPass)
-      : BinaryFunctionPass(PrintPass) {}
+  explicit PrintProgramStats() : BinaryFunctionPass(false) {}
 
   const char *getName() const override { return "print-stats"; }
   bool shouldPrint(const BinaryFunction &) const override { return false; }
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 0b2a4e86561f3a..70e324cc0165bb 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -14,6 +14,7 @@
 #include "bolt/Profile/DataAggregator.h"
 #include "bolt/Core/BinaryContext.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Passes/BinaryPasses.h"
 #include "bolt/Profile/BoltAddressTranslation.h"
 #include "bolt/Profile/Heatmap.h"
 #include "bolt/Profile/YAMLProfileWriter.h"
@@ -611,6 +612,7 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
         if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
           report_error("cannot create output data file", EC);
     }
+    BC.logBOLTErrorsAndQuitOnFatal(PrintProgramStats().runOnFunctions(BC));
   }
 
   return Error::success();
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index be4888ccfa5645..cbb7199a53ddd1 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -356,7 +356,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
   // order they're registered.
 
   // Run this pass first to use stats for the original functions.
-  Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint));
+  Manager.registerPass(std::make_unique<PrintProgramStats>());
 
   if (opts::PrintProfileStats)
     Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp
index fa43b7a2f92c23..74b5ca18abce42 100644
--- a/bolt/lib/Rewrite/BoltDiff.cpp
+++ b/bolt/lib/Rewrite/BoltDiff.cpp
@@ -292,7 +292,7 @@ class RewriteInstanceDiff {
         }
       }
     }
-    PrintProgramStats PPS(opts::NeverPrint);
+    PrintProgramStats PPS;
     outs() << "* BOLT-DIFF: Starting print program stats pass for binary 1\n";
     RI1.BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*RI1.BC));
     outs() << "* BOLT-DIFF: Starting print program stats pass for binary 2\n";
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index a6b2f3cc0850c3..3cf0e749f9d667 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1725,12 +1725,6 @@ void RewriteInstance::adjustFunctionBoundaries() {
       if (!Function.isSymbolValidInScope(Symbol, SymbolSize))
         break;
 
-      // Ignore unnamed symbols. Used, for example, by debugging info on RISC-V.
-      if (BC->isRISCV() && cantFail(Symbol.getName()).empty()) {
-        ++NextSymRefI;
-        continue;
-      }
-
       // Skip basic block labels. This happens on RISC-V with linker relaxation
       // enabled because every branch needs a relocation and corresponding
       // symbol. We don't want to add such symbols as entry points.
diff --git a/bolt/test/RISCV/unnamed-sym-no-entry.c b/bolt/test/RISCV/fake-label-no-entry.c
similarity index 88%
rename from bolt/test/RISCV/unnamed-sym-no-entry.c
rename to bolt/test/RISCV/fake-label-no-entry.c
index b4173506b213ce..bd125263101bb4 100644
--- a/bolt/test/RISCV/unnamed-sym-no-entry.c
+++ b/bolt/test/RISCV/fake-label-no-entry.c
@@ -5,12 +5,12 @@
 
 // RUN: %clang %cflags -g -Wl,-q -o %t %s
 
-/// Verify that the binary indeed contains an unnamed symbol at _start
+/// Verify that the binary indeed contains a fake label ".L0 " at _start.
 // RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=CHECK-ELF
 // CHECK-ELF-DAG: [[#%x,START:]] {{.*}} FUNC GLOBAL DEFAULT [[#%d,SECTION:]] _start{{$}}
 // CHECK-ELF-DAG: [[#%x,START]] {{.*}} NOTYPE LOCAL DEFAULT [[#SECTION]] .L0 {{$}}
 
-/// Verify that BOLT did not create an extra entry point for the unnamed symbol
+/// Verify that BOLT did not create an extra entry point for the fake label.
 // RUN: llvm-bolt -o %t.bolt %t --print-cfg | FileCheck %s
 // CHECK: Binary Function "_start" after building cfg {
 // CHECK:  IsMultiEntry: 0
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index e8c3f64239a27d..0bd44720f1b7a1 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -11,7 +11,14 @@ REQUIRES: system-linux
 
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
 RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
-RUN:   --profile-use-dfs
+RUN:   --profile-use-dfs | FileCheck %s
+
+RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
+RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s
+
+CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
 RUN: cat %t | sort | FileCheck %s -check-prefix=PERF2BOLT
 RUN: cat %t.new | FileCheck %s -check-prefix=NEWFORMAT
 
diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
index 6852db6c2ee311..8005d6e91c060c 100644
--- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
@@ -16,6 +16,7 @@ add_clang_library(clangTidyModernizeModule
   MakeSharedCheck.cpp
   MakeSmartPtrCheck.cpp
   MakeUniqueCheck.cpp
+  MinMaxUseInitializerListCheck.cpp
   ModernizeTidyModule.cpp
   PassByValueCheck.cpp
   RawStringLiteralCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp
new file mode 100644
index 00000000000000..45f7700463d570
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp
@@ -0,0 +1,271 @@
+//===--- MinMaxUseInitializerListCheck.cpp - clang-tidy -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MinMaxUseInitializerListCheck.h"
+#include "../utils/ASTUtils.h"
+#include "../utils/LexerUtils.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang;
+
+namespace {
+
+struct FindArgsResult {
+  const Expr *First;
+  const Expr *Last;
+  const Expr *Compare;
+  SmallVector<const clang::Expr *, 2> Args;
+};
+
+} // anonymous namespace
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::modernize {
+
+static FindArgsResult findArgs(const CallExpr *Call) {
+  FindArgsResult Result;
+  Result.First = nullptr;
+  Result.Last = nullptr;
+  Result.Compare = nullptr;
+
+  //   check if the function has initializer list argument
+  if (Call->getNumArgs() < 3) {
+    auto ArgIterator = Call->arguments().begin();
+
+    const auto *InitListExpr =
+        dyn_cast<CXXStdInitializerListExpr>(*ArgIterator);
+    const auto *InitList =
+        InitListExpr != nullptr
+            ? dyn_cast<clang::InitListExpr>(
+                  InitListExpr->getSubExpr()->IgnoreImplicit())
+            : nullptr;
+
+    if (InitList) {
+      Result.Args.append(InitList->inits().begin(), InitList->inits().end());
+      Result.First = *ArgIterator;
+      Result.Last = *ArgIterator;
+
+      // check if there is a comparison argument
+      std::advance(ArgIterator, 1);
+      if (ArgIterator != Call->arguments().end())
+        Result.Compare = *ArgIterator;
+
+      return Result;
+    }
+    Result.Args = SmallVector<const Expr *>(Call->arguments());
+  } else {
+    // if it has 3 arguments then the last will be the comparison
+    Result.Compare = *(std::next(Call->arguments().begin(), 2));
+    Result.Args = SmallVector<const Expr *>(llvm::drop_end(Call->arguments()));
+  }
+  Result.First = Result.Args.front();
+  Result.Last = Result.Args.back();
+
+  return Result;
+}
+
+static SmallVector<FixItHint>
+generateReplacements(const MatchFinder::MatchResult &Match,
+                     const CallExpr *TopCall, const FindArgsResult &Result,
+                     const bool IgnoreNonTrivialTypes,
+                     const std::uint64_t IgnoreTrivialTypesOfSizeAbove) {
+  SmallVector<FixItHint> FixItHints;
+  const SourceManager &SourceMngr = *Match.SourceManager;
+  const LangOptions &LanguageOpts = Match.Context->getLangOpts();
+
+  const QualType ResultType = TopCall->getDirectCallee()
+                                  ->getReturnType()
+                                  .getCanonicalType()
+                                  .getNonReferenceType()
+                                  .getUnqualifiedType();
+
+  // check if the type is trivial
+  const bool IsResultTypeTrivial = ResultType.isTrivialType(*Match.Context);
+
+  if ((!IsResultTypeTrivial && IgnoreNonTrivialTypes))
+    return FixItHints;
+
+  if (IsResultTypeTrivial &&
+      static_cast<std::uint64_t>(
+          Match.Context->getTypeSizeInChars(ResultType).getQuantity()) >
+          IgnoreTrivialTypesOfSizeAbove)
+    return FixItHints;
+
+  for (const Expr *Arg : Result.Args) {
+    const auto *InnerCall = dyn_cast<CallExpr>(Arg->IgnoreParenImpCasts());
+
+    // If the argument is not a nested call
+    if (!InnerCall) {
+      // check if typecast is required
+      const QualType ArgType = Arg->IgnoreParenImpCasts()
+                                   ->getType()
+                                   .getCanonicalType()
+                                   .getUnqualifiedType();
+
+      if (ArgType == ResultType)
+        continue;
+
+      const StringRef ArgText = Lexer::getSourceText(
+          CharSourceRange::getTokenRange(Arg->getSourceRange()), SourceMngr,
+          LanguageOpts);
+
+      const auto Replacement = Twine("static_cast<")
+                                   .concat(ResultType.getAsString(LanguageOpts))
+                                   .concat(">(")
+                                   .concat(ArgText)
+                                   .concat(")")
+                                   .str();
+
+      FixItHints.push_back(
+          FixItHint::CreateReplacement(Arg->getSourceRange(), Replacement));
+      continue;
+    }
+
+    const FindArgsResult InnerResult = findArgs(InnerCall);
+
+    // if the nested call doesn't have arguments skip it
+    if (!InnerResult.First || !InnerResult.Last)
+      continue;
+
+    // if the nested call is not the same as the top call
+    if (InnerCall->getDirectCallee()->getQualifiedNameAsString() !=
+        TopCall->getDirectCallee()->getQualifiedNameAsString())
+      continue;
+
+    // if the nested call doesn't have the same compare function
+    if ((Result.Compare || InnerResult.Compare) &&
+        !utils::areStatementsIdentical(Result.Compare, InnerResult.Compare,
+                                       *Match.Context))
+      continue;
+
+    // remove the function call
+    FixItHints.push_back(
+        FixItHint::CreateRemoval(InnerCall->getCallee()->getSourceRange()));
+
+    // remove the parentheses
+    const auto LParen = utils::lexer::findNextTokenSkippingComments(
+        InnerCall->getCallee()->getEndLoc(), SourceMngr, LanguageOpts);
+    if (LParen.has_value() && LParen->is(tok::l_paren))
+      FixItHints.push_back(
+          FixItHint::CreateRemoval(SourceRange(LParen->getLocation())));
+    FixItHints.push_back(
+        FixItHint::CreateRemoval(SourceRange(InnerCall->getRParenLoc())));
+
+    // if the inner call has an initializer list arg
+    if (InnerResult.First == InnerResult.Last) {
+      // remove the initializer list braces
+      FixItHints.push_back(FixItHint::CreateRemoval(
+          CharSourceRange::getTokenRange(InnerResult.First->getBeginLoc())));
+      FixItHints.push_back(FixItHint::CreateRemoval(
+          CharSourceRange::getTokenRange(InnerResult.First->getEndLoc())));
+    }
+
+    const SmallVector<FixItHint> InnerReplacements = generateReplacements(
+        Match, InnerCall, InnerResult, IgnoreNonTrivialTypes,
+        IgnoreTrivialTypesOfSizeAbove);
+
+    FixItHints.append(InnerReplacements);
+
+    if (InnerResult.Compare) {
+      // find the comma after the value arguments
+      const auto Comma = utils::lexer::findNextTokenSkippingComments(
+          InnerResult.Last->getEndLoc(), SourceMngr, LanguageOpts);
+
+      // remove the comma and the comparison
+      if (Comma.has_value() && Comma->is(tok::comma))
+        FixItHints.push_back(
+            FixItHint::CreateRemoval(SourceRange(Comma->getLocation())));
+
+      FixItHints.push_back(
+          FixItHint::CreateRemoval(InnerResult.Compare->getSourceRange()));
+    }
+  }
+
+  return FixItHints;
+}
+
+MinMaxUseInitializerListCheck::MinMaxUseInitializerListCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      IgnoreNonTrivialTypes(Options.get("IgnoreNonTrivialTypes", true)),
+      IgnoreTrivialTypesOfSizeAbove(
+          Options.get("IgnoreTrivialTypesOfSizeAbove", 32L)),
+      Inserter(Options.getLocalOrGlobal("IncludeStyle",
+                                        utils::IncludeSorter::IS_LLVM),
+               areDiagsSelfContained()) {}
+
+void MinMaxUseInitializerListCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "IgnoreNonTrivialTypes", IgnoreNonTrivialTypes);
+  Options.store(Opts, "IgnoreTrivialTypesOfSizeAbove",
+                IgnoreTrivialTypesOfSizeAbove);
+  Options.store(Opts, "IncludeStyle", Inserter.getStyle());
+}
+
+void MinMaxUseInitializerListCheck::registerMatchers(MatchFinder *Finder) {
+  auto CreateMatcher = [](const StringRef FunctionName) {
+    auto FuncDecl = functionDecl(hasName(FunctionName));
+    auto Expression = callExpr(callee(FuncDecl));
+
+    return callExpr(callee(FuncDecl),
+                    anyOf(hasArgument(0, Expression),
+                          hasArgument(1, Expression),
+                          hasArgument(0, cxxStdInitializerListExpr())),
+                    unless(hasParent(Expression)))
+        .bind("topCall");
+  };
+
+  Finder->addMatcher(CreateMatcher("::std::max"), this);
+  Finder->addMatcher(CreateMatcher("::std::min"), this);
+}
+
+void MinMaxUseInitializerListCheck::registerPPCallbacks(
+    const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
+  Inserter.registerPreprocessor(PP);
+}
+
+void MinMaxUseInitializerListCheck::check(
+    const MatchFinder::MatchResult &Match) {
+
+  const auto *TopCall = Match.Nodes.getNodeAs<CallExpr>("topCall");
+
+  const FindArgsResult Result = findArgs(TopCall);
+  const SmallVector<FixItHint> Replacements =
+      generateReplacements(Match, TopCall, Result, IgnoreNonTrivialTypes,
+                           IgnoreTrivialTypesOfSizeAbove);
+
+  if (Replacements.empty())
+    return;
+
+  const DiagnosticBuilder Diagnostic =
+      diag(TopCall->getBeginLoc(),
+           "do not use nested 'std::%0' calls, use an initializer list instead")
+      << TopCall->getDirectCallee()->getName()
+      << Inserter.createIncludeInsertion(
+             Match.SourceManager->getFileID(TopCall->getBeginLoc()),
+             "<algorithm>");
+
+  // if the top call doesn't have an initializer list argument
+  if (Result.First != Result.Last) {
+    // add { and } insertions
+    Diagnostic << FixItHint::CreateInsertion(Result.First->getBeginLoc(), "{");
+
+    Diagnostic << FixItHint::CreateInsertion(
+        Lexer::getLocForEndOfToken(Result.Last->getEndLoc(), 0,
+                                   *Match.SourceManager,
+                                   Match.Context->getLangOpts()),
+        "}");
+  }
+
+  Diagnostic << Replacements;
+}
+
+} // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h
new file mode 100644
index 00000000000000..577d1265307612
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h
@@ -0,0 +1,56 @@
+//===--- MinMaxUseInitializerListCheck.h - clang-tidy -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H
+
+#include "../ClangTidyCheck.h"
+#include "../utils/IncludeInserter.h"
+
+namespace clang::tidy::modernize {
+
+/// Replaces nested ``std::min`` and ``std::max`` calls with an initializer list
+/// where applicable.
+///
+/// For example:
+///
+/// \code
+///   int a = std::max(std::max(i, j), k);
+/// \endcode
+///
+/// This code is transformed to:
+///
+/// \code
+///   int a = std::max({i, j, k});
+/// \endcode
+class MinMaxUseInitializerListCheck : public ClangTidyCheck {
+public:
+  MinMaxUseInitializerListCheck(StringRef Name, ClangTidyContext *Context);
+
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
+                           Preprocessor *ModuleExpanderPP) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Match) override;
+
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus11;
+  }
+  std::optional<TraversalKind> getCheckTraversalKind() const override {
+    return TK_IgnoreUnlessSpelledInSource;
+  }
+
+private:
+  bool IgnoreNonTrivialTypes;
+  std::uint64_t IgnoreTrivialTypesOfSizeAbove;
+  utils::IncludeInserter Inserter;
+};
+
+} // namespace clang::tidy::modernize
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
index e96cf274f58cfe..776558433c5baa 100644
--- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -18,6 +18,7 @@
 #include "MacroToEnumCheck.h"
 #include "MakeSharedCheck.h"
 #include "MakeUniqueCheck.h"
+#include "MinMaxUseInitializerListCheck.h"
 #include "PassByValueCheck.h"
 #include "RawStringLiteralCheck.h"
 #include "RedundantVoidArgCheck.h"
@@ -68,6 +69,8 @@ class ModernizeModule : public ClangTidyModule {
     CheckFactories.registerCheck<MacroToEnumCheck>("modernize-macro-to-enum");
     CheckFactories.registerCheck<MakeSharedCheck>("modernize-make-shared");
     CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
+    CheckFactories.registerCheck<MinMaxUseInitializerListCheck>(
+        "modernize-min-max-use-initializer-list");
     CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
     CheckFactories.registerCheck<UseDesignatedInitializersCheck>(
         "modernize-use-designated-initializers");
diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
index dd772d69202548..41065fc8e87859 100644
--- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
@@ -28,6 +28,7 @@ add_clang_library(clangTidyReadabilityModule
   IsolateDeclarationCheck.cpp
   MagicNumbersCheck.cpp
   MakeMemberFunctionConstCheck.cpp
+  MathMissingParenthesesCheck.cpp
   MisleadingIndentationCheck.cpp
   MisplacedArrayIndexCheck.cpp
   NamedParameterCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp
new file mode 100644
index 00000000000000..d1e20b9074cec1
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp
@@ -0,0 +1,97 @@
+//===--- MathMissingParenthesesCheck.cpp - clang-tidy ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MathMissingParenthesesCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::readability {
+
+void MathMissingParenthesesCheck::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(binaryOperator(unless(hasParent(binaryOperator())),
+                                    unless(isAssignmentOperator()),
+                                    unless(isComparisonOperator()),
+                                    unless(hasAnyOperatorName("&&", "||")),
+                                    hasDescendant(binaryOperator()))
+                         .bind("binOp"),
+                     this);
+}
+
+static int getPrecedence(const BinaryOperator *BinOp) {
+  if (!BinOp)
+    return 0;
+  switch (BinOp->getOpcode()) {
+  case BO_Mul:
+  case BO_Div:
+  case BO_Rem:
+    return 5;
+  case BO_Add:
+  case BO_Sub:
+    return 4;
+  case BO_And:
+    return 3;
+  case BO_Xor:
+    return 2;
+  case BO_Or:
+    return 1;
+  default:
+    return 0;
+  }
+}
+static void addParantheses(const BinaryOperator *BinOp,
+                           const BinaryOperator *ParentBinOp,
+                           ClangTidyCheck *Check,
+                           const clang::SourceManager &SM,
+                           const clang::LangOptions &LangOpts) {
+  if (!BinOp)
+    return;
+
+  int Precedence1 = getPrecedence(BinOp);
+  int Precedence2 = getPrecedence(ParentBinOp);
+
+  if (ParentBinOp != nullptr && Precedence1 != Precedence2) {
+    const clang::SourceLocation StartLoc = BinOp->getBeginLoc();
+    const clang::SourceLocation EndLoc =
+        clang::Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, SM, LangOpts);
+    if (EndLoc.isInvalid())
+      return;
+
+    Check->diag(StartLoc,
+                "'%0' has higher precedence than '%1'; add parentheses to "
+                "explicitly specify the order of operations")
+        << (Precedence1 > Precedence2 ? BinOp->getOpcodeStr()
+                                      : ParentBinOp->getOpcodeStr())
+        << (Precedence1 > Precedence2 ? ParentBinOp->getOpcodeStr()
+                                      : BinOp->getOpcodeStr())
+        << FixItHint::CreateInsertion(StartLoc, "(")
+        << FixItHint::CreateInsertion(EndLoc, ")")
+        << SourceRange(StartLoc, EndLoc);
+  }
+
+  addParantheses(dyn_cast<BinaryOperator>(BinOp->getLHS()->IgnoreImpCasts()),
+                 BinOp, Check, SM, LangOpts);
+  addParantheses(dyn_cast<BinaryOperator>(BinOp->getRHS()->IgnoreImpCasts()),
+                 BinOp, Check, SM, LangOpts);
+}
+
+void MathMissingParenthesesCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *BinOp = Result.Nodes.getNodeAs<BinaryOperator>("binOp");
+  std::vector<
+      std::pair<clang::SourceRange, std::pair<const clang::BinaryOperator *,
+                                              const clang::BinaryOperator *>>>
+      Insertions;
+  const SourceManager &SM = *Result.SourceManager;
+  const clang::LangOptions &LO = Result.Context->getLangOpts();
+  addParantheses(BinOp, nullptr, this, SM, LO);
+}
+
+} // namespace clang::tidy::readability
diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h
new file mode 100644
index 00000000000000..9a9d2b3cfaabae
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h
@@ -0,0 +1,34 @@
+//===--- MathMissingParenthesesCheck.h - clang-tidy -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::readability {
+
+/// Check for mising parantheses in mathematical expressions that involve
+/// operators of different priorities.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/readability/math-missing-parentheses.html
+class MathMissingParenthesesCheck : public ClangTidyCheck {
+public:
+  MathMissingParenthesesCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  std::optional<TraversalKind> getCheckTraversalKind() const override {
+    return TK_IgnoreUnlessSpelledInSource;
+  }
+};
+
+} // namespace clang::tidy::readability
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
index 376b84683df74e..d61c0ba39658e5 100644
--- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
@@ -32,6 +32,7 @@
 #include "IsolateDeclarationCheck.h"
 #include "MagicNumbersCheck.h"
 #include "MakeMemberFunctionConstCheck.h"
+#include "MathMissingParenthesesCheck.h"
 #include "MisleadingIndentationCheck.h"
 #include "MisplacedArrayIndexCheck.h"
 #include "NamedParameterCheck.h"
@@ -105,6 +106,8 @@ class ReadabilityModule : public ClangTidyModule {
         "readability-identifier-naming");
     CheckFactories.registerCheck<ImplicitBoolConversionCheck>(
         "readability-implicit-bool-conversion");
+    CheckFactories.registerCheck<MathMissingParenthesesCheck>(
+        "readability-math-missing-parentheses");
     CheckFactories.registerCheck<RedundantInlineSpecifierCheck>(
         "readability-redundant-inline-specifier");
     CheckFactories.registerCheck<InconsistentDeclarationParameterNameCheck>(
diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
index 2075e5965f181e..9b4442b0bb76fd 100644
--- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp
+++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
@@ -253,7 +253,7 @@ void getSignature(const CodeCompletionString &CCS, std::string *Signature,
       if (!IncludeFunctionArguments &&
           ResultKind == CodeCompletionResult::RK_Declaration)
         TruncateSnippetAt.emplace(Snippet->size());
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
     case CodeCompletionString::CK_RightParen:
     case CodeCompletionString::CK_LeftBracket:
     case CodeCompletionString::CK_RightBracket:
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 5b1feffb89ea06..2867fc95803048 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -131,6 +131,12 @@ New checks
   to reading out-of-bounds data due to inadequate or incorrect string null
   termination.
 
+- New :doc:`modernize-min-max-use-initializer-list
+  <clang-tidy/checks/modernize/min-max-use-initializer-list>` check.
+
+  Replaces nested ``std::min`` and ``std::max`` calls with an initializer list
+  where applicable.
+
 - New :doc:`modernize-use-designated-initializers
   <clang-tidy/checks/modernize/use-designated-initializers>` check.
 
@@ -143,6 +149,12 @@ New checks
   Enforces consistent style for enumerators' initialization, covering three
   styles: none, first only, or all initialized explicitly.
 
+- New :doc:`readability-math-missing-parentheses
+  <clang-tidy/checks/readability/math-missing-parentheses>` check.
+
+  Check for missing parentheses in mathematical expressions that involve
+  operators of different priorities.
+
 - New :doc:`readability-use-std-min-max
   <clang-tidy/checks/readability/use-std-min-max>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 5d9d487f75f9cb..49747ff896ba5c 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -276,6 +276,7 @@ Clang-Tidy Checks
    :doc:`modernize-macro-to-enum <modernize/macro-to-enum>`, "Yes"
    :doc:`modernize-make-shared <modernize/make-shared>`, "Yes"
    :doc:`modernize-make-unique <modernize/make-unique>`, "Yes"
+   :doc:`modernize-min-max-use-initializer-list <modernize/min-max-use-initializer-list>`, "Yes"
    :doc:`modernize-pass-by-value <modernize/pass-by-value>`, "Yes"
    :doc:`modernize-raw-string-literal <modernize/raw-string-literal>`, "Yes"
    :doc:`modernize-redundant-void-arg <modernize/redundant-void-arg>`, "Yes"
@@ -363,6 +364,7 @@ Clang-Tidy Checks
    :doc:`readability-isolate-declaration <readability/isolate-declaration>`, "Yes"
    :doc:`readability-magic-numbers <readability/magic-numbers>`,
    :doc:`readability-make-member-function-const <readability/make-member-function-const>`, "Yes"
+   :doc:`readability-math-missing-parentheses <readability/math-missing-parentheses>`, "Yes"
    :doc:`readability-misleading-indentation <readability/misleading-indentation>`,
    :doc:`readability-misplaced-array-index <readability/misplaced-array-index>`, "Yes"
    :doc:`readability-named-parameter <readability/named-parameter>`, "Yes"
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst
new file mode 100644
index 00000000000000..d6721a25629b05
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst
@@ -0,0 +1,50 @@
+.. title:: clang-tidy - modernize-min-max-use-initializer-list
+
+modernize-min-max-use-initializer-list
+======================================
+
+Replaces nested ``std::min`` and ``std::max`` calls with an initializer list 
+where applicable.
+
+For instance, consider the following code:
+
+.. code-block:: cpp
+
+  int a = std::max(std::max(i, j), k);
+
+The check will transform the above code to:
+
+.. code-block:: cpp
+
+  int a = std::max({i, j, k});
+
+Performance Considerations
+==========================
+
+While this check simplifies the code and makes it more readable, it may cause 
+performance degradation for non-trivial types due to the need to copy objects 
+into the initializer list.
+
+To avoid this, it is recommended to use `std::ref` or `std::cref` for
+non-trivial types:
+
+.. code-block:: cpp
+
+  std::string b = std::max({std::ref(i), std::ref(j), std::ref(k)});
+
+Options
+=======
+
+.. option:: IncludeStyle
+
+  A string specifying which include-style is used, `llvm` or `google`. Default
+  is `llvm`.
+
+.. option:: IgnoreNonTrivialTypes
+
+  A boolean specifying whether to ignore non-trivial types. Default is `true`.
+
+.. option:: IgnoreTrivialTypesOfSizeAbove
+
+  An integer specifying the size (in bytes) above which trivial types are
+  ignored. Default is `32`.
\ No newline at end of file
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst
new file mode 100644
index 00000000000000..21d66daab334c6
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst
@@ -0,0 +1,27 @@
+.. title:: clang-tidy - readability-math-missing-parentheses
+
+readability-math-missing-parentheses
+====================================
+
+Check for missing parentheses in mathematical expressions that involve operators
+of different priorities.
+
+Parentheses in mathematical expressions clarify the order
+of operations, especially with different-priority operators. Lengthy or multiline
+expressions can obscure this order, leading to coding errors. IDEs can aid clarity
+by highlighting parentheses. Explicitly using parentheses also clarifies what the 
+developer had in mind when writing the expression. Ensuring their presence reduces
+ambiguity and errors, promoting clearer and more maintainable code.
+
+Before:
+
+.. code-block:: c++
+
+  int x = 1 + 2 * 3 - 4 / 5;
+
+
+After:
+
+.. code-block:: c++
+
+  int x = 1 + (2 * 3) - (4 / 5);
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp
new file mode 100644
index 00000000000000..51ab9bda975f10
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp
@@ -0,0 +1,305 @@
+// RUN: %check_clang_tidy %s modernize-min-max-use-initializer-list %t
+
+// CHECK-FIXES: #include <algorithm>
+namespace utils {
+template <typename T>
+T max(T a, T b) {
+  return (a < b) ? b : a;
+}
+} // namespace utils
+
+namespace std {
+template< class T >
+struct initializer_list {
+  initializer_list()=default;
+  initializer_list(T*,int){}
+  const T* begin() const {return nullptr;}
+  const T* end() const {return nullptr;}
+};
+
+template<class ForwardIt>
+ForwardIt min_element(ForwardIt first, ForwardIt last)
+{
+    if (first == last)
+        return last;
+
+    ForwardIt smallest = first;
+
+    while (++first != last)
+        if (*first < *smallest)
+            smallest = first;
+
+    return smallest;
+}
+
+template<class ForwardIt, class Compare>
+ForwardIt min_element(ForwardIt first, ForwardIt last, Compare comp)
+{
+    if (first == last)
+        return last;
+
+    ForwardIt smallest = first;
+
+    while (++first != last)
+        if (comp(*first, *smallest))
+            smallest = first;
+
+    return smallest;
+}
+
+template<class ForwardIt>
+ForwardIt max_element(ForwardIt first, ForwardIt last)
+{
+    if (first == last)
+        return last;
+
+    ForwardIt largest = first;
+
+    while (++first != last)
+        if (*largest < *first)
+            largest = first;
+
+    return largest;
+}
+
+template<class ForwardIt, class Compare>
+ForwardIt max_element(ForwardIt first, ForwardIt last, Compare comp)
+{
+    if (first == last)
+        return last;
+
+    ForwardIt largest = first;
+
+    while(++first != last)
+        if (comp(*largest, *first))
+            largest = first;
+
+    return largest;
+}
+
+template< class T >
+const T& max( const T& a, const T& b ) {
+  return (a < b) ? b : a;
+};
+
+template< class T >
+T max(std::initializer_list<T> ilist)
+{
+    return *std::max_element(ilist.begin(), ilist.end());
+}
+
+template< class T, class Compare >
+const T& max( const T& a, const T& b, Compare comp ) {
+  return (comp(a, b)) ? b : a;
+};
+
+template< class T, class Compare >
+T max(std::initializer_list<T> ilist, Compare comp) {
+    return *std::max_element(ilist.begin(), ilist.end(), comp);
+};
+
+template< class T >
+const T& min( const T& a, const T& b ) {
+  return (b < a) ? b : a;
+};
+
+template< class T >
+T min(std::initializer_list<T> ilist)
+{
+    return *std::min_element(ilist.begin(), ilist.end());
+}
+
+
+template< class T, class Compare >
+const T& min( const T& a, const T& b, Compare comp ) {
+  return (comp(b, a)) ? b : a;
+};
+
+template< class T, class Compare >
+T min(std::initializer_list<T> ilist, Compare comp) {
+    return *std::min_element(ilist.begin(), ilist.end(), comp);
+};
+
+} // namespace std
+
+using namespace std;
+
+namespace {
+bool fless_than(int a, int b) {
+return a < b;
+}
+
+bool fgreater_than(int a, int b) {
+return a > b;
+}
+auto less_than = [](int a, int b) { return a < b; };
+auto greater_than = [](int a, int b) { return a > b; };
+
+int max1 = std::max(1, std::max(2, 3));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max1 = std::max({1, 2, 3});
+
+int min1 = std::min(1, std::min(2, 3));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min1 = std::min({1, 2, 3});
+
+int max2 = std::max(1, std::max(2, std::max(3, 4)));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max2 = std::max({1, 2, 3, 4});
+
+int max2b = std::max(std::max(std::max(1, 2), std::max(3, 4)), std::max(std::max(5, 6), std::max(7, 8)));
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max2b = std::max({1, 2, 3, 4, 5, 6, 7, 8});
+
+int max2c = std::max(std::max(1, std::max(2, 3)), 4);
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max2c = std::max({1, 2, 3, 4});
+
+int max2d = std::max(std::max({1, 2, 3}), 4);
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max2d = std::max({1, 2, 3, 4});
+
+
+int max2e = std::max(1, max(2, max(3, 4)));
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max2e = std::max({1, 2, 3, 4});
+
+int min2 = std::min(1, std::min(2, std::min(3, 4)));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min2 = std::min({1, 2, 3, 4});
+
+int max3 = std::max(std::max(4, 5), std::min(2, std::min(3, 1)));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max3 = std::max({4, 5, std::min({2, 3, 1})});
+
+int min3 = std::min(std::min(4, 5), std::max(2, std::max(3, 1)));
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min3 = std::min({4, 5, std::max({2, 3, 1})});
+
+int max4 = std::max(1, std::max(2, 3, greater_than), less_than);
+// CHECK-FIXES: int max4 = std::max(1, std::max(2, 3, greater_than), less_than);
+
+int min4 = std::min(1, std::min(2, 3, greater_than), less_than);
+// CHECK-FIXES: int min4 = std::min(1, std::min(2, 3, greater_than), less_than);
+
+int max5 = std::max(1, std::max(2, 3), less_than);
+// CHECK-FIXES: int max5 = std::max(1, std::max(2, 3), less_than);
+
+int min5 = std::min(1, std::min(2, 3), less_than);
+// CHECK-FIXES: int min5 = std::min(1, std::min(2, 3), less_than);
+
+int max6 = std::max(1, std::max(2, 3, greater_than), greater_than);
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max6 = std::max({1, 2, 3 }, greater_than);
+
+int min6 = std::min(1, std::min(2, 3, greater_than), greater_than);
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min6 = std::min({1, 2, 3 }, greater_than);
+
+int max7 = std::max(1, std::max(2, 3, fless_than), fgreater_than);
+// CHECK-FIXES: int max7 = std::max(1, std::max(2, 3, fless_than), fgreater_than);
+
+int min7 = std::min(1, std::min(2, 3, fless_than), fgreater_than);
+// CHECK-FIXES: int min7 = std::min(1, std::min(2, 3, fless_than), fgreater_than);
+
+int max8 = std::max(1, std::max(2, 3, fless_than), less_than);
+// CHECK-FIXES: int max8 = std::max(1, std::max(2, 3, fless_than), less_than)
+
+int min8 = std::min(1, std::min(2, 3, fless_than), less_than);
+// CHECK-FIXES: int min8 = std::min(1, std::min(2, 3, fless_than), less_than);
+
+int max9 = std::max(1, std::max(2, 3, fless_than), fless_than);
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max9 = std::max({1, 2, 3 }, fless_than);
+
+int min9 = std::min(1, std::min(2, 3, fless_than), fless_than);
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min9 = std::min({1, 2, 3 }, fless_than);
+
+int min10 = std::min(std::min(4, 5), std::max(2, utils::max(3, 1)));
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int min10 = std::min({4, 5, std::max(2, utils::max(3, 1))});
+
+int max10 = std::max({std::max(1, 2), std::max({5, 6, 1}), 2, std::min({1, 2, 4})});
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int max10 = std::max({1, 2, 5, 6, 1, 2, std::min({1, 2, 4})});
+
+int typecastTest = std::max(std::max<int>(0U, 0.0f), 0);
+// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int typecastTest = std::max({static_cast<int>(0U), static_cast<int>(0.0f), 0});
+
+int typecastTest1 = std::max(std::max<long>(0U, 0.0f), 0L);
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int typecastTest1 = std::max({static_cast<long>(0U), static_cast<long>(0.0f), 0L});
+
+int typecastTest2 = std::max(std::max<int>(10U, 20.0f), 30);
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int typecastTest2 = std::max({static_cast<int>(10U), static_cast<int>(20.0f), 30});
+
+int typecastTest3 = std::max(std::max<int>(0U, std::max<int>(0.0f, 1.0f)), 0);
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int typecastTest3 = std::max({static_cast<int>(0U), static_cast<int>(0.0f), static_cast<int>(1.0f), 0});
+
+#define max3f(a, b, c) std::max(a, std::max(b, c))
+// CHECK-FIXES: #define max3f(a, b, c) std::max(a, std::max(b, c))
+
+#define value 4545
+int macroVarMax = std::max(value, std::max(1, 2));
+// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int macroVarMax = std::max({value, 1, 2});
+
+#define value2 45U
+int macroVarMax2 = std::max(1, std::max<int>(value2, 2.0f));
+// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list]
+// CHECK-FIXES: int macroVarMax2 = std::max({1, static_cast<int>(value2), static_cast<int>(2.0f)});
+
+// True-negative tests
+int maxTN1 = std::max(1, 2);
+// CHECK-FIXES: int maxTN1 = std::max(1, 2);
+
+int maxTN2 = std::max({1, 2, 3});
+// CHECK-FIXES: int maxTN2 = std::max({1, 2, 3});
+
+int maxTN3 = std::max({1, 2, 3}, less_than);
+// CHECK-FIXES: int maxTN3 = std::max({1, 2, 3}, less_than);
+
+// non-trivial types
+struct A {
+  int a;
+  A(int a) : a(a) {}
+  bool operator<(const A &rhs) const { return a < rhs.a; }
+};
+
+A maxNT1 = std::max(A(1), A(2));
+// CHECK-FIXES: A maxNT1 = std::max(A(1), A(2));
+
+A maxNT2 = std::max(A(1), std::max(A(2), A(3)));
+// CHECK-FIXES: A maxNT2 = std::max(A(1), std::max(A(2), A(3)));
+
+A maxNT3 = std::max(A(1), std::max(A(2), A(3)), [](const A &lhs, const A &rhs) { return lhs.a < rhs.a; });
+// CHECK-FIXES: A maxNT3 = std::max(A(1), std::max(A(2), A(3)), [](const A &lhs, const A &rhs) { return lhs.a < rhs.a; });
+
+// Trivial type with size greater than 32
+struct B {
+  // 9*4 = 36 bytes > 32 bytes
+  int a[9];
+
+  bool operator<(const B& rhs) const {
+    return a[0] < rhs.a[0];
+  }
+};
+
+B maxTT1 = std::max(B(), B());
+// CHECK-FIXES: B maxTT1 = std::max(B(), B());
+
+B maxTT2 = std::max(B(), std::max(B(), B()));
+// CHECK-FIXES: B maxTT2 = std::max(B(), std::max(B(), B()));
+
+B maxTT3 = std::max(B(), std::max(B(), B()), [](const B &lhs, const B &rhs) { return lhs.a[0] < rhs.a[0]; });
+// CHECK-FIXES: B maxTT3 = std::max(B(), std::max(B(), B()), [](const B &lhs, const B &rhs) { return lhs.a[0] < rhs.a[0]; });
+
+
+} // namespace
+
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp
new file mode 100644
index 00000000000000..edbe2e1c37c770
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp
@@ -0,0 +1,120 @@
+// RUN: %check_clang_tidy %s readability-math-missing-parentheses %t
+
+#define MACRO_AND &
+#define MACRO_ADD +
+#define MACRO_OR |
+#define MACRO_MULTIPLY *
+#define MACRO_XOR ^
+#define MACRO_SUBTRACT -
+#define MACRO_DIVIDE /
+
+int foo(){
+    return 5;
+}
+
+int bar(){
+    return 4;
+}
+
+class fun{
+public:
+    int A;
+    double B;
+    fun(){
+        A = 5;
+        B = 5.4;
+    }
+};
+
+void f(){
+    //CHECK-MESSAGES: :[[@LINE+2]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int a = 1 + (2 * 3);
+    int a = 1 + 2 * 3;
+
+    int a_negative = 1 + (2 * 3); // No warning
+
+    int b = 1 + 2 + 3; // No warning
+
+    int c = 1 * 2 * 3; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+2]]:25: warning: '/' has higher precedence than '-'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int d = 1 + (2 * 3) - (4 / 5);
+    int d = 1 + 2 * 3 - 4 / 5;
+
+    int d_negative = 1 + (2 * 3) - (4 / 5); // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+4]]:13: warning: '&' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '+' has higher precedence than '&'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+2]]:25: warning: '*' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int e = (1 & (2 + 3)) | (4 * 5);
+    int e = 1 & 2 + 3 | 4 * 5;
+
+    int e_negative = (1 & (2 + 3)) | (4 * 5); // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int f = (1 * -2) + 4;
+    int f = 1 * -2 + 4;
+
+    int f_negative = (1 * -2) + 4; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int g = (1 * 2 * 3) + 4 + 5;
+    int g = 1 * 2 * 3 + 4 + 5;
+
+    int g_negative = (1 * 2 * 3) + 4 + 5; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+4]]:13: warning: '&' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+3]]:19: warning: '+' has higher precedence than '&'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+2]]:27: warning: '*' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int h = (120 & (2 + 3)) | (22 * 5);
+    int h = 120 & 2 + 3 | 22 * 5;
+
+    int h_negative = (120 & (2 + 3)) | (22 * 5); // No warning
+
+    int i = 1 & 2 & 3; // No warning
+
+    int j = 1 | 2 | 3; // No warning
+
+    int k = 1 ^ 2 ^ 3; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '+' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int l = (1 + 2) ^ 3;
+    int l = 1 + 2 ^ 3;
+
+    int l_negative = (1 + 2) ^ 3; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int m = (2 * foo()) + bar();
+    int m = 2 * foo() + bar();
+
+    int m_negative = (2 * foo()) + bar(); // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int n = (1.05 * foo()) + double(bar());
+    int n = 1.05 * foo() + double(bar());
+
+    int n_negative = (1.05 * foo()) + double(bar()); // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int o = 1 + (obj.A * 3) + obj.B;
+    fun obj;
+    int o = 1 + obj.A * 3 + obj.B;
+
+    int o_negative = 1 + (obj.A * 3) + obj.B; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+2]]:18: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int p = 1U + (2 * 3);
+    int p = 1U + 2 * 3;
+
+    int p_negative = 1U + (2 * 3); // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+7]]:13: warning: '+' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+6]]:25: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+5]]:53: warning: '&' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+4]]:53: warning: '^' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+3]]:77: warning: '-' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-MESSAGES: :[[@LINE+2]]:94: warning: '/' has higher precedence than '-'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    //CHECK-FIXES: int q = (1 MACRO_ADD (2 MACRO_MULTIPLY 3)) MACRO_OR ((4 MACRO_AND 5) MACRO_XOR (6 MACRO_SUBTRACT (7 MACRO_DIVIDE 8)));
+    int q = 1 MACRO_ADD 2 MACRO_MULTIPLY 3 MACRO_OR 4 MACRO_AND 5 MACRO_XOR 6 MACRO_SUBTRACT 7 MACRO_DIVIDE 8; // No warning
+}
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index fa972636553f1f..c164d5497275f3 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -1,95 +1,93 @@
 # Plain options configure the first build.
 # BOOTSTRAP_* options configure the second build.
 # BOOTSTRAP_BOOTSTRAP_* options configure the third build.
+# PGO Builds have 3 stages (stage1, stage2-instrumented, stage2)
+# non-PGO Builds have 2 stages (stage1, stage2)
 
-# General Options
+
+function (set_final_stage_var name value type)
+  if (LLVM_RELEASE_ENABLE_PGO)
+    set(BOOTSTRAP_BOOTSTRAP_${name} ${value} CACHE ${type} "")
+  else()
+    set(BOOTSTRAP_${name} ${value} CACHE ${type} "")
+  endif()
+endfunction()
+
+function (set_instrument_and_final_stage_var name value type)
+  # This sets the varaible for the final stage in non-PGO builds and in
+  # the stage2-instrumented stage for PGO builds.
+  set(BOOTSTRAP_${name} ${value} CACHE ${type} "")
+  if (LLVM_RELEASE_ENABLE_PGO)
+    # Set the variable in the final stage for PGO builds.
+    set(BOOTSTRAP_BOOTSTRAP_${name} ${value} CACHE ${type} "")
+  endif()
+endfunction()
+
+# General Options:
+# If you want to override any of the LLVM_RELEASE_* variables you can set them
+# on the command line via -D, but you need to do this before you pass this
+# cache file to CMake via -C. e.g.
+#
+# cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake
 set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "")
 set(LLVM_RELEASE_ENABLE_PGO OFF CACHE BOOL "")
-
+set(LLVM_RELEASE_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
+set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "")
+# Note we don't need to add install here, since it is one of the pre-defined
+# steps.
+set(LLVM_RELEASE_FINAL_STAGE_TARGETS "clang;package;check-all;check-llvm;check-clang" CACHE STRING "")
 set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "")
 
-# Stage 1 Bootstrap Setup
+# Stage 1 Options
+set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
 set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
+
+set(STAGE1_PROJECTS "clang")
+set(STAGE1_RUNTIMES "")
+
 if (LLVM_RELEASE_ENABLE_PGO)
+  list(APPEND STAGE1_PROJECTS "lld")
+  list(APPEND STAGE1_RUNTIMES "compiler-rt")
   set(CLANG_BOOTSTRAP_TARGETS
     generate-profdata
-    stage2
     stage2-package
     stage2-clang
-    stage2-distribution
     stage2-install
-    stage2-install-distribution
-    stage2-install-distribution-toolchain
     stage2-check-all
     stage2-check-llvm
-    stage2-check-clang
-    stage2-test-suite CACHE STRING "")
-else()
-  set(CLANG_BOOTSTRAP_TARGETS
-    clang
-    check-all
-    check-llvm
-    check-clang
-    test-suite
-    stage3
-    stage3-clang
-    stage3-check-all
-    stage3-check-llvm
-    stage3-check-clang
-    stage3-install
-    stage3-test-suite CACHE STRING "")
-endif()
+    stage2-check-clang CACHE STRING "")
 
-# Stage 1 Options
-set(STAGE1_PROJECTS "clang")
-set(STAGE1_RUNTIMES "")
+  # Configuration for stage2-instrumented
+  set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "")
+  # This enables the build targets for the final stage which is called stage2.
+  set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS ${LLVM_RELEASE_FINAL_STAGE_TARGETS} CACHE STRING "")
+  set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE STRING "")
+  set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt" CACHE STRING "")
+  set(BOOTSTRAP_LLVM_ENABLE_PROJECTS "clang;lld" CACHE STRING "")
 
-if (LLVM_RELEASE_ENABLE_PGO)
-  list(APPEND STAGE1_PROJECTS "lld")
-  list(APPEND STAGE1_RUNTIMES "compiler-rt")
+else()
+  if (LLVM_RELEASE_ENABLE_LTO)
+    list(APPEND STAGE1_PROJECTS "lld")
+  endif()
+  # Any targets added here will be given the target name stage2-${target}, so
+  # if you want to run them you can just use:
+  # ninja -C $BUILDDIR stage2-${target}
+  set(CLANG_BOOTSTRAP_TARGETS ${LLVM_RELEASE_FINAL_STAGE_TARGETS} CACHE STRING "")
 endif()
 
+# Stage 1 Common Config
 set(LLVM_ENABLE_RUNTIMES ${STAGE1_RUNTIMES} CACHE STRING "")
 set(LLVM_ENABLE_PROJECTS ${STAGE1_PROJECTS} CACHE STRING "")
 
-set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Stage 2 Bootstrap Setup
-set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "")
-set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS
-  clang
-  package
-  check-all
-  check-llvm
-  check-clang CACHE STRING "")
-
-# Stage 2 Options
-set(STAGE2_PROJECTS "clang")
-set(STAGE2_RUNTIMES "")
-
-if (LLVM_RELEASE_ENABLE_LTO OR LLVM_RELEASE_ENABLE_PGO)
- list(APPEND STAGE2_PROJECTS "lld")
-endif()
-
-if (LLVM_RELEASE_ENABLE_PGO)
-  set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE STRING "")
-  list(APPEND STAGE2_RUNTIMES "compiler-rt")
-  set(BOOTSTRAP_LLVM_ENABLE_LTO ${LLVM_RELEASE_ENABLE_LTO})
-  if (LLVM_RELEASE_ENABLE_LTO)
-    set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
-  endif()
+# stage2-instrumented and Final Stage Config:
+# Options that need to be set in both the instrumented stage (if we are doing
+# a pgo build) and the final stage.
+set_instrument_and_final_stage_var(LLVM_ENABLE_LTO "${LLVM_RELEASE_ENABLE_LTO}" STRING)
+if (LLVM_RELEASE_ENABLE_LTO)
+  set_instrument_and_final_stage_var(LLVM_ENABLE_LLD "ON" BOOL)
 endif()
 
-set(BOOTSTRAP_LLVM_ENABLE_PROJECTS ${STAGE2_PROJECTS} CACHE STRING "")
-set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES ${STAGE2_RUNTIMES} CACHE STRING "")
-if (NOT LLVM_RELEASE_ENABLE_PGO)
-  set(BOOTSTRAP_LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-endif()
+# Final Stage Config (stage2)
+set_final_stage_var(LLVM_ENABLE_RUNTIMES "${LLVM_RELEASE_ENABLE_RUNTIMES}" STRING)
+set_final_stage_var(LLVM_ENABLE_PROJECTS "${LLVM_RELEASE_ENABLE_PROJECTS}" STRING)
 
-# Stage 3 Options
-set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
-set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "")
-set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LTO ${LLVM_RELEASE_ENABLE_LTO} CACHE STRING "")
-if (LLVM_RELEASE_ENABLE_LTO)
-  set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
-endif()
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c951282e76c3b2..1e091f39564a58 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -212,6 +212,16 @@ Non-comprehensive list of changes in this release
 - ``__typeof_unqual__`` is available in all C modes as an extension, which behaves
   like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``.
 
+
+* Shared libraries linked with either the ``-ffast-math``, ``-Ofast``, or
+  ``-funsafe-math-optimizations`` flags will no longer enable flush-to-zero
+  floating-point mode by default. This decision can be overridden with use of
+  ``-mdaz-ftz``. This behavior now matches GCC's behavior.
+  (`#57589 <https://github.com/llvm/llvm-project/issues/57589>`_)
+
+* ``-fdenormal-fp-math=preserve-sign`` is no longer implied by ``-ffast-math``
+  on x86 systems.
+
 New Compiler Flags
 ------------------
 - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 8df40566fcba3d..d0326f01d251e0 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -1506,7 +1506,8 @@ floating point semantic models: precise (the default), strict, and fast.
 
    * ``-ffp-contract=fast``
 
-   Note: ``-ffast-math`` causes ``crtfastmath.o`` to be linked with code. See
+   Note: ``-ffast-math`` causes ``crtfastmath.o`` to be linked with code unless
+   ``-shared`` or ``-mno-daz-ftz`` is present. See
    :ref:`crtfastmath.o` for more details.
 
 .. option:: -fno-fast-math
@@ -1560,7 +1561,8 @@ floating point semantic models: precise (the default), strict, and fast.
      ``-ffp-contract``.
 
    Note: ``-fno-fast-math`` implies ``-fdenormal-fp-math=ieee``.
-   ``-fno-fast-math`` causes ``crtfastmath.o`` to not be linked with code.
+   ``-fno-fast-math`` causes ``crtfastmath.o`` to not be linked with code
+   unless ``-mdaz-ftz`` is present.
 
 .. option:: -fdenormal-fp-math=<value>
 
@@ -1938,10 +1940,13 @@ by using ``#pragma STDC FENV_ROUND`` with a value other than ``FE_DYNAMIC``.
 
 A note about ``crtfastmath.o``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-``-ffast-math`` and ``-funsafe-math-optimizations`` cause ``crtfastmath.o`` to be
-automatically linked,  which adds a static constructor that sets the FTZ/DAZ
+``-ffast-math`` and ``-funsafe-math-optimizations`` without the ``-shared``
+option cause ``crtfastmath.o`` to be
+automatically linked, which adds a static constructor that sets the FTZ/DAZ
 bits in MXCSR, affecting not only the current compilation unit but all static
-and shared libraries included in the program.
+and shared libraries included in the program. This decision can be overridden
+by using either the flag ``-mdaz-ftz`` or ``-mno-daz-ftz`` to respectively
+link or not link ``crtfastmath.o``.
 
 .. _FLT_EVAL_METHOD:
 
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 7a8bd985a91fc0..365b607c741179 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -1644,8 +1644,9 @@ enum CXCursorKind {
   CXCursor_ObjCSelfExpr = 146,
 
   /** OpenMP 5.0 [2.1.5, Array Section].
+   * OpenACC 3.3 [2.7.1, Data Specification for Data Clauses (Sub Arrays)]
    */
-  CXCursor_OMPArraySectionExpr = 147,
+  CXCursor_ArraySectionExpr = 147,
 
   /** Represents an @available(...) check.
    */
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 24388ad5dea5e6..a662d94994ecdb 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1127,7 +1127,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy;
   CanQualType OCLQueueTy, OCLReserveIDTy;
   CanQualType IncompleteMatrixIdxTy;
-  CanQualType OMPArraySectionTy, OMPArrayShapingTy, OMPIteratorTy;
+  CanQualType ArraySectionTy;
+  CanQualType OMPArrayShapingTy, OMPIteratorTy;
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   CanQualType Id##Ty;
 #include "clang/Basic/OpenCLExtensionTypes.def"
diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def
index c04f6f6f127191..0a36fdc5d9c0f7 100644
--- a/clang/include/clang/AST/BuiltinTypes.def
+++ b/clang/include/clang/AST/BuiltinTypes.def
@@ -320,7 +320,7 @@ PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy)
 PLACEHOLDER_TYPE(IncompleteMatrixIdx, IncompleteMatrixIdxTy)
 
 // A placeholder type for OpenMP array sections.
-PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy)
+PLACEHOLDER_TYPE(ArraySection, ArraySectionTy)
 
 // A placeholder type for OpenMP array shaping operation.
 PLACEHOLDER_TYPE(OMPArrayShaping, OMPArrayShapingTy)
diff --git a/clang/include/clang/AST/ComputeDependence.h b/clang/include/clang/AST/ComputeDependence.h
index 7abf9141237dc8..6d3a51c379f9df 100644
--- a/clang/include/clang/AST/ComputeDependence.h
+++ b/clang/include/clang/AST/ComputeDependence.h
@@ -94,7 +94,7 @@ class DesignatedInitExpr;
 class ParenListExpr;
 class PseudoObjectExpr;
 class AtomicExpr;
-class OMPArraySectionExpr;
+class ArraySectionExpr;
 class OMPArrayShapingExpr;
 class OMPIteratorExpr;
 class ObjCArrayLiteral;
@@ -189,7 +189,7 @@ ExprDependence computeDependence(ParenListExpr *E);
 ExprDependence computeDependence(PseudoObjectExpr *E);
 ExprDependence computeDependence(AtomicExpr *E);
 
-ExprDependence computeDependence(OMPArraySectionExpr *E);
+ExprDependence computeDependence(ArraySectionExpr *E);
 ExprDependence computeDependence(OMPArrayShapingExpr *E);
 ExprDependence computeDependence(OMPIteratorExpr *E);
 
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 2bfefeabc348be..f2bf667636dc9b 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6610,6 +6610,275 @@ class TypoExpr : public Expr {
 
 };
 
+/// This class represents BOTH the OpenMP Array Section and OpenACC 'subarray',
+/// with a boolean differentiator.
+/// OpenMP 5.0 [2.1.5, Array Sections].
+/// To specify an array section in an OpenMP construct, array subscript
+/// expressions are extended with the following syntax:
+/// \code
+/// [ lower-bound : length : stride ]
+/// [ lower-bound : length : ]
+/// [ lower-bound : length ]
+/// [ lower-bound : : stride ]
+/// [ lower-bound : : ]
+/// [ lower-bound : ]
+/// [ : length : stride ]
+/// [ : length : ]
+/// [ : length ]
+/// [ : : stride ]
+/// [ : : ]
+/// [ : ]
+/// \endcode
+/// The array section must be a subset of the original array.
+/// Array sections are allowed on multidimensional arrays. Base language array
+/// subscript expressions can be used to specify length-one dimensions of
+/// multidimensional array sections.
+/// Each of the lower-bound, length, and stride expressions if specified must be
+/// an integral type expressions of the base language. When evaluated
+/// they represent a set of integer values as follows:
+/// \code
+/// { lower-bound, lower-bound + stride, lower-bound + 2 * stride,... ,
+/// lower-bound + ((length - 1) * stride) }
+/// \endcode
+/// The lower-bound and length must evaluate to non-negative integers.
+/// The stride must evaluate to a positive integer.
+/// When the size of the array dimension is not known, the length must be
+/// specified explicitly.
+/// When the stride is absent it defaults to 1.
+/// When the length is absent it defaults to ⌈(size − lower-bound)/stride⌉,
+/// where size is the size of the array dimension. When the lower-bound is
+/// absent it defaults to 0.
+///
+///
+/// OpenACC 3.3 [2.7.1 Data Specification in Data Clauses]
+/// In C and C++, a subarray is an array name followed by an extended array
+/// range specification in brackets, with start and length, such as
+///
+/// AA[2:n]
+///
+/// If the lower bound is missing, zero is used. If the length is missing and
+/// the array has known size, the size of the array is used; otherwise the
+/// length is required. The subarray AA[2:n] means elements AA[2], AA[3], . . .
+/// , AA[2+n-1]. In C and C++, a two dimensional array may be declared in at
+/// least four ways:
+///
+/// -Statically-sized array: float AA[100][200];
+/// -Pointer to statically sized rows: typedef float row[200]; row* BB;
+/// -Statically-sized array of pointers: float* CC[200];
+/// -Pointer to pointers: float** DD;
+///
+/// Each dimension may be statically sized, or a pointer to dynamically
+/// allocated memory. Each of these may be included in a data clause using
+/// subarray notation to specify a rectangular array:
+///
+/// -AA[2:n][0:200]
+/// -BB[2:n][0:m]
+/// -CC[2:n][0:m]
+/// -DD[2:n][0:m]
+///
+/// Multidimensional rectangular subarrays in C and C++ may be specified for any
+/// array with any combination of statically-sized or dynamically-allocated
+/// dimensions. For statically sized dimensions, all dimensions except the first
+/// must specify the whole extent to preserve the contiguous data restriction,
+/// discussed below. For dynamically allocated dimensions, the implementation
+/// will allocate pointers in device memory corresponding to the pointers in
+/// local memory and will fill in those pointers as appropriate.
+///
+/// In Fortran, a subarray is an array name followed by a comma-separated list
+/// of range specifications in parentheses, with lower and upper bound
+/// subscripts, such as
+///
+/// arr(1:high,low:100)
+///
+/// If either the lower or upper bounds are missing, the declared or allocated
+/// bounds of the array, if known, are used. All dimensions except the last must
+/// specify the whole extent, to preserve the contiguous data restriction,
+/// discussed below.
+///
+/// Restrictions
+///
+/// -In Fortran, the upper bound for the last dimension of an assumed-size dummy
+/// array must be specified.
+///
+/// -In C and C++, the length for dynamically allocated dimensions of an array
+/// must be explicitly specified.
+///
+/// -In C and C++, modifying pointers in pointer arrays during the data
+/// lifetime, either on the host or on the device, may result in undefined
+/// behavior.
+///
+/// -If a subarray  appears in a data clause, the implementation may choose to
+/// allocate memory for only that subarray on the accelerator.
+///
+/// -In Fortran, array pointers may appear, but pointer association is not
+/// preserved in device memory.
+///
+/// -Any array or subarray in a data clause, including Fortran array pointers,
+/// must be a contiguous section of memory, except for dynamic multidimensional
+/// C arrays.
+///
+/// -In C and C++, if a variable or array of composite type appears, all the
+/// data members of the struct or class are allocated and copied, as
+/// appropriate. If a composite member is a pointer type, the data addressed by
+/// that pointer are not implicitly copied.
+///
+/// -In Fortran, if a variable or array of composite type appears, all the
+/// members of that derived type are allocated and copied, as appropriate. If
+/// any member has the allocatable or pointer attribute, the data accessed
+/// through that member are not copied.
+///
+/// -If an expression is used in a subscript or subarray expression in a clause
+/// on a data construct, the same value is used when copying data at the end of
+/// the data region, even if the values of variables in the expression change
+/// during the data region.
+class ArraySectionExpr : public Expr {
+  friend class ASTStmtReader;
+  friend class ASTStmtWriter;
+
+public:
+  enum ArraySectionType { OMPArraySection, OpenACCArraySection };
+
+private:
+  enum {
+    BASE,
+    LOWER_BOUND,
+    LENGTH,
+    STRIDE,
+    END_EXPR,
+    OPENACC_END_EXPR = STRIDE
+  };
+
+  ArraySectionType ASType = OMPArraySection;
+  Stmt *SubExprs[END_EXPR] = {nullptr};
+  SourceLocation ColonLocFirst;
+  SourceLocation ColonLocSecond;
+  SourceLocation RBracketLoc;
+
+public:
+  // Constructor for OMP array sections, which include a 'stride'.
+  ArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, Expr *Stride,
+                   QualType Type, ExprValueKind VK, ExprObjectKind OK,
+                   SourceLocation ColonLocFirst, SourceLocation ColonLocSecond,
+                   SourceLocation RBracketLoc)
+      : Expr(ArraySectionExprClass, Type, VK, OK), ASType(OMPArraySection),
+        ColonLocFirst(ColonLocFirst), ColonLocSecond(ColonLocSecond),
+        RBracketLoc(RBracketLoc) {
+    setBase(Base);
+    setLowerBound(LowerBound);
+    setLength(Length);
+    setStride(Stride);
+    setDependence(computeDependence(this));
+  }
+
+  // Constructor for OpenACC sub-arrays, which do not permit a 'stride'.
+  ArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, QualType Type,
+                   ExprValueKind VK, ExprObjectKind OK, SourceLocation ColonLoc,
+                   SourceLocation RBracketLoc)
+      : Expr(ArraySectionExprClass, Type, VK, OK), ASType(OpenACCArraySection),
+        ColonLocFirst(ColonLoc), RBracketLoc(RBracketLoc) {
+    setBase(Base);
+    setLowerBound(LowerBound);
+    setLength(Length);
+    setDependence(computeDependence(this));
+  }
+
+  /// Create an empty array section expression.
+  explicit ArraySectionExpr(EmptyShell Shell)
+      : Expr(ArraySectionExprClass, Shell) {}
+
+  /// Return original type of the base expression for array section.
+  static QualType getBaseOriginalType(const Expr *Base);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == ArraySectionExprClass;
+  }
+
+  bool isOMPArraySection() const { return ASType == OMPArraySection; }
+  bool isOpenACCArraySection() const { return ASType == OpenACCArraySection; }
+
+  /// Get base of the array section.
+  Expr *getBase() { return cast<Expr>(SubExprs[BASE]); }
+  const Expr *getBase() const { return cast<Expr>(SubExprs[BASE]); }
+
+  /// Get lower bound of array section.
+  Expr *getLowerBound() { return cast_or_null<Expr>(SubExprs[LOWER_BOUND]); }
+  const Expr *getLowerBound() const {
+    return cast_or_null<Expr>(SubExprs[LOWER_BOUND]);
+  }
+
+  /// Get length of array section.
+  Expr *getLength() { return cast_or_null<Expr>(SubExprs[LENGTH]); }
+  const Expr *getLength() const { return cast_or_null<Expr>(SubExprs[LENGTH]); }
+
+  /// Get stride of array section.
+  Expr *getStride() {
+    assert(ASType != OpenACCArraySection &&
+           "Stride not valid in OpenACC subarrays");
+    return cast_or_null<Expr>(SubExprs[STRIDE]);
+  }
+
+  const Expr *getStride() const {
+    assert(ASType != OpenACCArraySection &&
+           "Stride not valid in OpenACC subarrays");
+    return cast_or_null<Expr>(SubExprs[STRIDE]);
+  }
+
+  SourceLocation getBeginLoc() const LLVM_READONLY {
+    return getBase()->getBeginLoc();
+  }
+  SourceLocation getEndLoc() const LLVM_READONLY { return RBracketLoc; }
+
+  SourceLocation getColonLocFirst() const { return ColonLocFirst; }
+  SourceLocation getColonLocSecond() const {
+    assert(ASType != OpenACCArraySection &&
+           "second colon for stride not valid in OpenACC subarrays");
+    return ColonLocSecond;
+  }
+  SourceLocation getRBracketLoc() const { return RBracketLoc; }
+
+  SourceLocation getExprLoc() const LLVM_READONLY {
+    return getBase()->getExprLoc();
+  }
+
+  child_range children() {
+    return child_range(
+        &SubExprs[BASE],
+        &SubExprs[ASType == OMPArraySection ? END_EXPR : OPENACC_END_EXPR]);
+  }
+
+  const_child_range children() const {
+    return const_child_range(
+        &SubExprs[BASE],
+        &SubExprs[ASType == OMPArraySection ? END_EXPR : OPENACC_END_EXPR]);
+  }
+
+private:
+  /// Set base of the array section.
+  void setBase(Expr *E) { SubExprs[BASE] = E; }
+
+  /// Set lower bound of the array section.
+  void setLowerBound(Expr *E) { SubExprs[LOWER_BOUND] = E; }
+
+  /// Set length of the array section.
+  void setLength(Expr *E) { SubExprs[LENGTH] = E; }
+
+  /// Set length of the array section.
+  void setStride(Expr *E) {
+    assert(ASType != OpenACCArraySection &&
+           "Stride not valid in OpenACC subarrays");
+    SubExprs[STRIDE] = E;
+  }
+
+  void setColonLocFirst(SourceLocation L) { ColonLocFirst = L; }
+
+  void setColonLocSecond(SourceLocation L) {
+    assert(ASType != OpenACCArraySection &&
+           "second colon for stride not valid in OpenACC subarrays");
+    ColonLocSecond = L;
+  }
+  void setRBracketLoc(SourceLocation L) { RBracketLoc = L; }
+};
+
 /// Frontend produces RecoveryExprs on semantic errors that prevent creating
 /// other well-formed expressions. E.g. when type-checking of a binary operator
 /// fails, we cannot produce a BinaryOperator expression. Instead, we can choose
diff --git a/clang/include/clang/AST/ExprOpenMP.h b/clang/include/clang/AST/ExprOpenMP.h
index be5b1f3fdd112f..54a0c203f656c3 100644
--- a/clang/include/clang/AST/ExprOpenMP.h
+++ b/clang/include/clang/AST/ExprOpenMP.h
@@ -17,130 +17,6 @@
 #include "clang/AST/Expr.h"
 
 namespace clang {
-/// OpenMP 5.0 [2.1.5, Array Sections].
-/// To specify an array section in an OpenMP construct, array subscript
-/// expressions are extended with the following syntax:
-/// \code
-/// [ lower-bound : length : stride ]
-/// [ lower-bound : length : ]
-/// [ lower-bound : length ]
-/// [ lower-bound : : stride ]
-/// [ lower-bound : : ]
-/// [ lower-bound : ]
-/// [ : length : stride ]
-/// [ : length : ]
-/// [ : length ]
-/// [ : : stride ]
-/// [ : : ]
-/// [ : ]
-/// \endcode
-/// The array section must be a subset of the original array.
-/// Array sections are allowed on multidimensional arrays. Base language array
-/// subscript expressions can be used to specify length-one dimensions of
-/// multidimensional array sections.
-/// Each of the lower-bound, length, and stride expressions if specified must be
-/// an integral type expressions of the base language. When evaluated
-/// they represent a set of integer values as follows:
-/// \code
-/// { lower-bound, lower-bound + stride, lower-bound + 2 * stride,... ,
-/// lower-bound + ((length - 1) * stride) }
-/// \endcode
-/// The lower-bound and length must evaluate to non-negative integers.
-/// The stride must evaluate to a positive integer.
-/// When the size of the array dimension is not known, the length must be
-/// specified explicitly.
-/// When the stride is absent it defaults to 1.
-/// When the length is absent it defaults to ⌈(size − lower-bound)/stride⌉,
-/// where size is the size of the array dimension. When the lower-bound is
-/// absent it defaults to 0.
-class OMPArraySectionExpr : public Expr {
-  enum { BASE, LOWER_BOUND, LENGTH, STRIDE, END_EXPR };
-  Stmt *SubExprs[END_EXPR];
-  SourceLocation ColonLocFirst;
-  SourceLocation ColonLocSecond;
-  SourceLocation RBracketLoc;
-
-public:
-  OMPArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, Expr *Stride,
-                      QualType Type, ExprValueKind VK, ExprObjectKind OK,
-                      SourceLocation ColonLocFirst,
-                      SourceLocation ColonLocSecond, SourceLocation RBracketLoc)
-      : Expr(OMPArraySectionExprClass, Type, VK, OK),
-        ColonLocFirst(ColonLocFirst), ColonLocSecond(ColonLocSecond),
-        RBracketLoc(RBracketLoc) {
-    SubExprs[BASE] = Base;
-    SubExprs[LOWER_BOUND] = LowerBound;
-    SubExprs[LENGTH] = Length;
-    SubExprs[STRIDE] = Stride;
-    setDependence(computeDependence(this));
-  }
-
-  /// Create an empty array section expression.
-  explicit OMPArraySectionExpr(EmptyShell Shell)
-      : Expr(OMPArraySectionExprClass, Shell) {}
-
-  /// An array section can be written only as Base[LowerBound:Length].
-
-  /// Get base of the array section.
-  Expr *getBase() { return cast<Expr>(SubExprs[BASE]); }
-  const Expr *getBase() const { return cast<Expr>(SubExprs[BASE]); }
-  /// Set base of the array section.
-  void setBase(Expr *E) { SubExprs[BASE] = E; }
-
-  /// Return original type of the base expression for array section.
-  static QualType getBaseOriginalType(const Expr *Base);
-
-  /// Get lower bound of array section.
-  Expr *getLowerBound() { return cast_or_null<Expr>(SubExprs[LOWER_BOUND]); }
-  const Expr *getLowerBound() const {
-    return cast_or_null<Expr>(SubExprs[LOWER_BOUND]);
-  }
-  /// Set lower bound of the array section.
-  void setLowerBound(Expr *E) { SubExprs[LOWER_BOUND] = E; }
-
-  /// Get length of array section.
-  Expr *getLength() { return cast_or_null<Expr>(SubExprs[LENGTH]); }
-  const Expr *getLength() const { return cast_or_null<Expr>(SubExprs[LENGTH]); }
-  /// Set length of the array section.
-  void setLength(Expr *E) { SubExprs[LENGTH] = E; }
-
-  /// Get stride of array section.
-  Expr *getStride() { return cast_or_null<Expr>(SubExprs[STRIDE]); }
-  const Expr *getStride() const { return cast_or_null<Expr>(SubExprs[STRIDE]); }
-  /// Set length of the array section.
-  void setStride(Expr *E) { SubExprs[STRIDE] = E; }
-
-  SourceLocation getBeginLoc() const LLVM_READONLY {
-    return getBase()->getBeginLoc();
-  }
-  SourceLocation getEndLoc() const LLVM_READONLY { return RBracketLoc; }
-
-  SourceLocation getColonLocFirst() const { return ColonLocFirst; }
-  void setColonLocFirst(SourceLocation L) { ColonLocFirst = L; }
-
-  SourceLocation getColonLocSecond() const { return ColonLocSecond; }
-  void setColonLocSecond(SourceLocation L) { ColonLocSecond = L; }
-
-  SourceLocation getRBracketLoc() const { return RBracketLoc; }
-  void setRBracketLoc(SourceLocation L) { RBracketLoc = L; }
-
-  SourceLocation getExprLoc() const LLVM_READONLY {
-    return getBase()->getExprLoc();
-  }
-
-  static bool classof(const Stmt *T) {
-    return T->getStmtClass() == OMPArraySectionExprClass;
-  }
-
-  child_range children() {
-    return child_range(&SubExprs[BASE], &SubExprs[END_EXPR]);
-  }
-
-  const_child_range children() const {
-    return const_child_range(&SubExprs[BASE], &SubExprs[END_EXPR]);
-  }
-};
-
 /// An explicit cast in C or a C-style cast in C++, which uses the syntax
 /// ([s1][s2]...[sn])expr. For example: @c ([3][3])f.
 class OMPArrayShapingExpr final
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 7eb92e304a3856..f9b145b4e86a55 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2740,7 +2740,7 @@ DEF_TRAVERSE_STMT(CXXMemberCallExpr, {})
 DEF_TRAVERSE_STMT(AddrLabelExpr, {})
 DEF_TRAVERSE_STMT(ArraySubscriptExpr, {})
 DEF_TRAVERSE_STMT(MatrixSubscriptExpr, {})
-DEF_TRAVERSE_STMT(OMPArraySectionExpr, {})
+DEF_TRAVERSE_STMT(ArraySectionExpr, {})
 DEF_TRAVERSE_STMT(OMPArrayShapingExpr, {})
 DEF_TRAVERSE_STMT(OMPIteratorExpr, {})
 
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 14b08d4927ec5e..fcffadacc8e631 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -370,4 +370,7 @@ def warn_missing_symbol_graph_dir : Warning<
   "Missing symbol graph output directory, defaulting to working directory">,
   InGroup<ExtractAPIMisuse>;
 
+def err_ast_action_on_llvm_ir : Error<
+  "cannot apply AST actions to LLVM IR file '%0'">,
+  DefaultFatal;
 }
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e9c941c1b068b7..e580091f879dde 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11159,7 +11159,7 @@ def err_omp_declare_mapper_redefinition : Error<
   "redefinition of user-defined mapper for type %0 with name %1">;
 def err_omp_invalid_mapper: Error<
   "cannot find a valid user-defined mapper for type %0 with name %1">;
-def err_omp_array_section_use : Error<"OpenMP array section is not allowed here">;
+def err_array_section_use : Error<"%select{OpenACC sub-array|OpenMP array section}0 is not allowed here">;
 def err_omp_array_shaping_use : Error<"OpenMP array shaping operation is not allowed here">;
 def err_omp_iterator_use : Error<"OpenMP iterator is not allowed here">;
 def err_omp_typecheck_section_value : Error<
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 2245fd78bfc9f0..8b4206e52cd482 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -114,6 +114,12 @@ class FileManager : public RefCountedBase<FileManager> {
   ///
   unsigned NextFileUID;
 
+  /// Statistics gathered during the lifetime of the FileManager.
+  unsigned NumDirLookups = 0;
+  unsigned NumFileLookups = 0;
+  unsigned NumDirCacheMisses = 0;
+  unsigned NumFileCacheMisses = 0;
+
   // Caching.
   std::unique_ptr<FileSystemStatCache> StatCache;
 
@@ -341,6 +347,10 @@ class FileManager : public RefCountedBase<FileManager> {
 
 public:
   void PrintStats() const;
+
+  /// Import statistics from a child FileManager and add them to this current
+  /// FileManager.
+  void AddStats(const FileManager &Other);
 };
 
 } // end namespace clang
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index b4e3ae573b95e6..305f19daa4a923 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -71,7 +71,7 @@ def OffsetOfExpr : StmtNode<Expr>;
 def UnaryExprOrTypeTraitExpr : StmtNode<Expr>;
 def ArraySubscriptExpr : StmtNode<Expr>;
 def MatrixSubscriptExpr : StmtNode<Expr>;
-def OMPArraySectionExpr : StmtNode<Expr>;
+def ArraySectionExpr : StmtNode<Expr>;
 def OMPIteratorExpr : StmtNode<Expr>;
 def CallExpr : StmtNode<Expr>;
 def MemberExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 41a85e8f20defb..fc0b2ac3b5900b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2617,6 +2617,11 @@ defm protect_parens : BoolFOption<"protect-parens",
           "floating-point expressions are evaluated">,
   NegFlag<SetFalse>>;
 
+defm daz_ftz : SimpleMFlag<"daz-ftz",
+  "Globally set", "Do not globally set",
+  " the denormals-are-zero (DAZ) and flush-to-zero (FTZ) bits in the "
+  "floating-point control register on program startup">;
+
 def ffor_scope : Flag<["-"], "ffor-scope">, Group<f_Group>;
 def fno_for_scope : Flag<["-"], "fno-for-scope">, Group<f_Group>;
 
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index ea28617f79b81b..da19503c2902fd 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -193,6 +193,12 @@ class SemaOpenACC : public SemaBase {
   /// conversions and diagnostics to 'int'.
   ExprResult ActOnIntExpr(OpenACCDirectiveKind DK, OpenACCClauseKind CK,
                           SourceLocation Loc, Expr *IntExpr);
+
+  /// Checks and creates an Array Section used in an OpenACC construct/clause.
+  ExprResult ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc,
+                                   Expr *LowerBound,
+                                   SourceLocation ColonLocFirst, Expr *Length,
+                                   SourceLocation RBLoc);
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 186c3b722ced16..a8df5a0bda0850 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -973,8 +973,8 @@ enum PredefinedTypeIDs {
   /// OpenCL reserve_id type.
   PREDEF_TYPE_RESERVE_ID_ID = 41,
 
-  /// The placeholder type for OpenMP array section.
-  PREDEF_TYPE_OMP_ARRAY_SECTION = 42,
+  /// The placeholder type for an array section.
+  PREDEF_TYPE_ARRAY_SECTION = 42,
 
   /// The '__float128' type
   PREDEF_TYPE_FLOAT128_ID = 43,
@@ -1926,7 +1926,7 @@ enum StmtCode {
   STMT_OMP_TARGET_TEAMS_GENERIC_LOOP_DIRECTIVE,
   STMT_OMP_PARALLEL_GENERIC_LOOP_DIRECTIVE,
   STMT_OMP_TARGET_PARALLEL_GENERIC_LOOP_DIRECTIVE,
-  EXPR_OMP_ARRAY_SECTION,
+  EXPR_ARRAY_SECTION,
   EXPR_OMP_ARRAY_SHAPING,
   EXPR_OMP_ITERATOR,
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 0a42eb53775b75..486d6ef620ad96 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1321,16 +1321,14 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
 
   // Placeholder type for OMP array sections.
   if (LangOpts.OpenMP) {
-    InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
+    InitBuiltinType(ArraySectionTy, BuiltinType::ArraySection);
     InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping);
     InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator);
   }
-  // Placeholder type for OpenACC array sections.
-  if (LangOpts.OpenACC) {
-    // FIXME: Once we implement OpenACC array sections in Sema, this will either
-    // be combined with the OpenMP type, or given its own type. In the meantime,
-    // just use the OpenMP type so that parsing can work.
-    InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
+  // Placeholder type for OpenACC array sections, if we are ALSO in OMP mode,
+  // don't bother, as we're just using the same type as OMP.
+  if (LangOpts.OpenACC && !LangOpts.OpenMP) {
+    InitBuiltinType(ArraySectionTy, BuiltinType::ArraySection);
   }
   if (LangOpts.MatrixTypes)
     InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);
diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp
index 5ec3013fabba9a..bad8e75b2f878c 100644
--- a/clang/lib/AST/ComputeDependence.cpp
+++ b/clang/lib/AST/ComputeDependence.cpp
@@ -443,12 +443,17 @@ ExprDependence clang::computeDependence(ObjCIndirectCopyRestoreExpr *E) {
   return E->getSubExpr()->getDependence();
 }
 
-ExprDependence clang::computeDependence(OMPArraySectionExpr *E) {
+ExprDependence clang::computeDependence(ArraySectionExpr *E) {
   auto D = E->getBase()->getDependence();
   if (auto *LB = E->getLowerBound())
     D |= LB->getDependence();
   if (auto *Len = E->getLength())
     D |= Len->getDependence();
+
+  if (E->isOMPArraySection()) {
+    if (auto *Stride = E->getStride())
+      D |= Stride->getDependence();
+  }
   return D;
 }
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 9eec7edc9d1a3e..63dcdb919c7117 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -3680,7 +3680,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case ParenExprClass:
   case ArraySubscriptExprClass:
   case MatrixSubscriptExprClass:
-  case OMPArraySectionExprClass:
+  case ArraySectionExprClass:
   case OMPArrayShapingExprClass:
   case OMPIteratorExprClass:
   case MemberExprClass:
@@ -5060,9 +5060,9 @@ QualType AtomicExpr::getValueType() const {
   return T;
 }
 
-QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) {
+QualType ArraySectionExpr::getBaseOriginalType(const Expr *Base) {
   unsigned ArraySectionCount = 0;
-  while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
+  while (auto *OASE = dyn_cast<ArraySectionExpr>(Base->IgnoreParens())) {
     Base = OASE->getBase();
     ++ArraySectionCount;
   }
diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp
index 7026fca8554ce9..2bb8f9aeedc7e2 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -145,7 +145,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::FunctionParmPackExprClass:
   case Expr::MSPropertyRefExprClass:
   case Expr::MSPropertySubscriptExprClass:
-  case Expr::OMPArraySectionExprClass:
+  case Expr::ArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
     return Cl::CL_LValue;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index de3c2a63913e94..ea3e7304a7423c 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16130,7 +16130,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::StringLiteralClass:
   case Expr::ArraySubscriptExprClass:
   case Expr::MatrixSubscriptExprClass:
-  case Expr::OMPArraySectionExprClass:
+  case Expr::ArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
   case Expr::MemberExprClass:
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 106c69dd5beed7..ed9e6eeb36c75d 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -4715,7 +4715,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
   case Expr::MSPropertySubscriptExprClass:
   case Expr::TypoExprClass: // This should no longer exist in the AST by now.
   case Expr::RecoveryExprClass:
-  case Expr::OMPArraySectionExprClass:
+  case Expr::ArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
   case Expr::CXXInheritedCtorInitExprClass:
diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp
index ecc56c13fb7573..6f586173edb021 100644
--- a/clang/lib/AST/NSAPI.cpp
+++ b/clang/lib/AST/NSAPI.cpp
@@ -462,7 +462,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
   case BuiltinType::PseudoObject:
   case BuiltinType::BuiltinFn:
   case BuiltinType::IncompleteMatrixIdx:
-  case BuiltinType::OMPArraySection:
+  case BuiltinType::ArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
   case BuiltinType::BFloat16:
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 5855ab3141edcc..f010d36513a49e 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1521,7 +1521,7 @@ void StmtPrinter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *Node) {
   OS << "]";
 }
 
-void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) {
+void StmtPrinter::VisitArraySectionExpr(ArraySectionExpr *Node) {
   PrintExpr(Node->getBase());
   OS << "[";
   if (Node->getLowerBound())
@@ -1531,7 +1531,7 @@ void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) {
     if (Node->getLength())
       PrintExpr(Node->getLength());
   }
-  if (Node->getColonLocSecond().isValid()) {
+  if (Node->isOMPArraySection() && Node->getColonLocSecond().isValid()) {
     OS << ":";
     if (Node->getStride())
       PrintExpr(Node->getStride());
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index c81724f84dd9ce..a95f5c6103e24d 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -1435,7 +1435,7 @@ void StmtProfiler::VisitMatrixSubscriptExpr(const MatrixSubscriptExpr *S) {
   VisitExpr(S);
 }
 
-void StmtProfiler::VisitOMPArraySectionExpr(const OMPArraySectionExpr *S) {
+void StmtProfiler::VisitArraySectionExpr(const ArraySectionExpr *S) {
   VisitExpr(S);
 }
 
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index cb22c91a12aa89..8aaa6801d85b8b 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3413,8 +3413,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
     return "reserve_id_t";
   case IncompleteMatrixIdx:
     return "<incomplete matrix index type>";
-  case OMPArraySection:
-    return "<OpenMP array section type>";
+  case ArraySection:
+    return "<array section type>";
   case OMPArrayShaping:
     return "<OpenMP array shaping type>";
   case OMPIterator:
@@ -4710,7 +4710,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
     case BuiltinType::BuiltinFn:
     case BuiltinType::NullPtr:
     case BuiltinType::IncompleteMatrixIdx:
-    case BuiltinType::OMPArraySection:
+    case BuiltinType::ArraySection:
     case BuiltinType::OMPArrayShaping:
     case BuiltinType::OMPIterator:
       return false;
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
index 21e152f6aea8a0..ce45b47d5cfea5 100644
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -429,7 +429,7 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
 #include "clang/Basic/WebAssemblyReferenceTypes.def"
   case BuiltinType::BuiltinFn:
   case BuiltinType::IncompleteMatrixIdx:
-  case BuiltinType::OMPArraySection:
+  case BuiltinType::ArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
     return TST_unspecified;
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index cd520a6375e07e..143c04309d0753 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -39,12 +39,6 @@ using namespace clang;
 
 #define DEBUG_TYPE "file-search"
 
-ALWAYS_ENABLED_STATISTIC(NumDirLookups, "Number of directory lookups.");
-ALWAYS_ENABLED_STATISTIC(NumFileLookups, "Number of file lookups.");
-ALWAYS_ENABLED_STATISTIC(NumDirCacheMisses,
-                         "Number of directory cache misses.");
-ALWAYS_ENABLED_STATISTIC(NumFileCacheMisses, "Number of file cache misses.");
-
 //===----------------------------------------------------------------------===//
 // Common logic.
 //===----------------------------------------------------------------------===//
@@ -656,6 +650,14 @@ StringRef FileManager::getCanonicalName(const void *Entry, StringRef Name) {
   return CanonicalName;
 }
 
+void FileManager::AddStats(const FileManager &Other) {
+  assert(&Other != this && "Collecting stats into the same FileManager");
+  NumDirLookups += Other.NumDirLookups;
+  NumFileLookups += Other.NumFileLookups;
+  NumDirCacheMisses += Other.NumDirCacheMisses;
+  NumFileCacheMisses += Other.NumFileCacheMisses;
+}
+
 void FileManager::PrintStats() const {
   llvm::errs() << "\n*** File Manager Stats:\n";
   llvm::errs() << UniqueRealFiles.size() << " real files found, "
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index dc6748d587d2ce..cbe7854c94ff51 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3628,7 +3628,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     // frexpl instead of legalizing this type in the BE.
     if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
       break;
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   }
   case Builtin::BI__builtin_frexp:
   case Builtin::BI__builtin_frexpf:
@@ -5366,7 +5366,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     case Builtin::BI__builtin_ptrauth_auth_and_resign:
       if (Args[4]->getType()->isPointerTy())
         Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
 
     case Builtin::BI__builtin_ptrauth_auth:
     case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
@@ -18858,7 +18858,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
       AppendFalseForOpselArg = true;
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
     case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
@@ -18867,7 +18867,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
       AppendFalseForOpselArg = true;
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
     case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
       ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b3e7aff3d96924..2ac5e0a0b95a05 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1586,6 +1586,11 @@ bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
   return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet());
 }
 
+bool CodeGenModule::ReturnTypeHasInReg(const CGFunctionInfo &FI) {
+  const auto &RI = FI.getReturnInfo();
+  return RI.getInReg();
+}
+
 bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
   return ReturnTypeUsesSRet(FI) &&
          getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 64d868568e7977..799ad7081b6e1d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1621,8 +1621,8 @@ LValue CodeGenFunction::EmitLValueHelper(const Expr *E,
     return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E));
   case Expr::MatrixSubscriptExprClass:
     return EmitMatrixSubscriptExpr(cast<MatrixSubscriptExpr>(E));
-  case Expr::OMPArraySectionExprClass:
-    return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E));
+  case Expr::ArraySectionExprClass:
+    return EmitArraySectionExpr(cast<ArraySectionExpr>(E));
   case Expr::ExtVectorElementExprClass:
     return EmitExtVectorElementExpr(cast<ExtVectorElementExpr>(E));
   case Expr::CXXThisExprClass:
@@ -4363,8 +4363,8 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
                                        QualType BaseTy, QualType ElTy,
                                        bool IsLowerBound) {
   LValue BaseLVal;
-  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) {
-    BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound);
+  if (auto *ASE = dyn_cast<ArraySectionExpr>(Base->IgnoreParenImpCasts())) {
+    BaseLVal = CGF.EmitArraySectionExpr(ASE, IsLowerBound);
     if (BaseTy->isArrayType()) {
       Address Addr = BaseLVal.getAddress(CGF);
       BaseInfo = BaseLVal.getBaseInfo();
@@ -4396,9 +4396,13 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
   return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo);
 }
 
-LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
-                                                bool IsLowerBound) {
-  QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(E->getBase());
+LValue CodeGenFunction::EmitArraySectionExpr(const ArraySectionExpr *E,
+                                             bool IsLowerBound) {
+
+  assert(!E->isOpenACCArraySection() &&
+         "OpenACC Array section codegen not implemented");
+
+  QualType BaseTy = ArraySectionExpr::getBaseOriginalType(E->getBase());
   QualType ResultExprTy;
   if (auto *AT = getContext().getAsArrayType(BaseTy))
     ResultExprTy = AT->getElementType();
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index 4e7f777ba1d916..43dd38659518d1 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -2905,23 +2905,29 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
       break;
     case CodeGenOptions::Mixed:
     case CodeGenOptions::NonLegacy:
+      StringRef name = "objc_msgSend";
       if (CGM.ReturnTypeUsesFPRet(ResultType)) {
-        imp =
-            CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true),
-                                      "objc_msgSend_fpret")
-                .getCallee();
+        name = "objc_msgSend_fpret";
       } else if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) {
-        // The actual types here don't matter - we're going to bitcast the
-        // function anyway
-        imp =
-            CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true),
-                                      "objc_msgSend_stret")
-                .getCallee();
-      } else {
-        imp = CGM.CreateRuntimeFunction(
-                     llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend")
-                  .getCallee();
+        name = "objc_msgSend_stret";
+
+        // The address of the memory block is be passed in x8 for POD type,
+        // or in x0 for non-POD type (marked as inreg).
+        bool shouldCheckForInReg =
+            CGM.getContext()
+                .getTargetInfo()
+                .getTriple()
+                .isWindowsMSVCEnvironment() &&
+            CGM.getContext().getTargetInfo().getTriple().isAArch64();
+        if (shouldCheckForInReg && CGM.ReturnTypeHasInReg(MSI.CallInfo)) {
+          name = "objc_msgSend_stret2";
+        }
       }
+      // The actual types here don't matter - we're going to bitcast the
+      // function anyway
+      imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true),
+                                      name)
+                .getCallee();
     }
 
   // Reset the receiver in case the lookup modified it
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index bb43af0276d183..19c2ca99181fb7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -763,8 +763,8 @@ LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
 
 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
                                             const Expr *E) {
-  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
-    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
+  if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
+    return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
   return LValue();
 }
 
@@ -821,7 +821,7 @@ void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
 
 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   QualType PrivateType = getPrivateType(N);
-  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
+  bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
   if (!PrivateType->isVariablyModifiedType()) {
     Sizes.emplace_back(
         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
@@ -962,9 +962,9 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
 
 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
   const VarDecl *OrigVD = nullptr;
-  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
+  if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+    while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
       Base = TempOASE->getBase()->IgnoreParenImpCasts();
     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
       Base = TempASE->getBase()->IgnoreParenImpCasts();
@@ -3591,9 +3591,8 @@ getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
     }
   } else if (const auto *ASE =
-                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
-    LValue UpAddrLVal =
-        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
+                 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
+    LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
         UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
@@ -6693,8 +6692,8 @@ class MappableExprsHandler {
     // Given that an array section is considered a built-in type, we need to
     // do the calculation based on the length of the section instead of relying
     // on CGF.getTypeSize(E->getType()).
-    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
-      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
+    if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
+      QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
                             OAE->getBase()->IgnoreParenImpCasts())
                             .getCanonicalType();
 
@@ -6800,7 +6799,7 @@ class MappableExprsHandler {
   /// Return true if the provided expression is a final array section. A
   /// final array section, is one whose length can't be proved to be one.
   bool isFinalArraySectionExpression(const Expr *E) const {
-    const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+    const auto *OASE = dyn_cast<ArraySectionExpr>(E);
 
     // It is not an array section and therefore not a unity-size one.
     if (!OASE)
@@ -6816,7 +6815,7 @@ class MappableExprsHandler {
     // for this dimension. Also, we should always expect a length if the
     // base type is pointer.
     if (!Length) {
-      QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
+      QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
                              OASE->getBase()->IgnoreParenImpCasts())
                              .getCanonicalType();
       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
@@ -7048,7 +7047,7 @@ class MappableExprsHandler {
     Address BP = Address::invalid();
     const Expr *AssocExpr = I->getAssociatedExpression();
     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
-    const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+    const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
 
     if (isa<MemberExpr>(AssocExpr)) {
@@ -7200,14 +7199,14 @@ class MappableExprsHandler {
       // special treatment for array sections given that they are built-in
       // types.
       const auto *OASE =
-          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
+          dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
       const auto *OAShE =
           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
       bool IsPointer =
           OAShE ||
-          (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
+          (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
                        .getCanonicalType()
                        ->isAnyPointerType()) ||
           I->getAssociatedExpression()->getType()->isAnyPointerType();
@@ -7228,7 +7227,7 @@ class MappableExprsHandler {
         assert((Next == CE ||
                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
-                isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
+                isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
@@ -7460,7 +7459,7 @@ class MappableExprsHandler {
             PartialStruct.LowestElem = {FieldIndex, LowestElem};
             if (IsFinalArraySection) {
               Address HB =
-                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
+                  CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
                       .getAddress(CGF);
               PartialStruct.HighestElem = {FieldIndex, HB};
             } else {
@@ -7473,7 +7472,7 @@ class MappableExprsHandler {
           } else if (FieldIndex > PartialStruct.HighestElem.first) {
             if (IsFinalArraySection) {
               Address HB =
-                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
+                  CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
                       .getAddress(CGF);
               PartialStruct.HighestElem = {FieldIndex, HB};
             } else {
@@ -7531,12 +7530,12 @@ class MappableExprsHandler {
     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
          Components) {
       const Expr *AssocExpr = Component.getAssociatedExpression();
-      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+      const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
 
       if (!OASE)
         continue;
 
-      QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+      QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
       auto *CAT = Context.getAsConstantArrayType(Ty);
       auto *VAT = Context.getAsVariableArrayType(Ty);
 
@@ -7610,7 +7609,7 @@ class MappableExprsHandler {
         continue;
       }
 
-      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+      const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
 
       if (!OASE)
         continue;
@@ -8801,7 +8800,7 @@ static ValueDecl *getDeclFromThisExpr(const Expr *E) {
   if (!E)
     return nullptr;
 
-  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
+  if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
     if (const MemberExpr *ME =
             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
       return ME->getMemberDecl();
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c00f96e22181f0..6407682d050eda 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -97,9 +97,9 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
       Base = TempASE->getBase()->IgnoreParenImpCasts();
     RefExpr = Base;
-  } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
+  } else if (auto *OASE = dyn_cast<ArraySectionExpr>(RefExpr)) {
     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+    while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
       Base = TempOASE->getBase()->IgnoreParenImpCasts();
     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
       Base = TempASE->getBase()->IgnoreParenImpCasts();
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c3843013b0a68c..c333981daccd53 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1442,7 +1442,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
     const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
     QualType Type = PrivateVD->getType();
-    bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
+    bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(IRef);
     if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
       // Store the address of the original variable associated with the LHS
       // implicit variable.
@@ -7630,7 +7630,7 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
 
 static const VarDecl *getBaseDecl(const Expr *Ref) {
   const Expr *Base = Ref->IgnoreParenImpCasts();
-  while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
+  while (const auto *OASE = dyn_cast<ArraySectionExpr>(Base))
     Base = OASE->getBase()->IgnoreParenImpCasts();
   while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
     Base = ASE->getBase()->IgnoreParenImpCasts();
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 77069d7a5b180b..2c21d080313da3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4228,8 +4228,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
                                 bool Accessed = false);
   LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E);
-  LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
-                                 bool IsLowerBound = true);
+  LValue EmitArraySectionExpr(const ArraySectionExpr *E,
+                              bool IsLowerBound = true);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
   LValue EmitMemberExpr(const MemberExpr *E);
   LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index cf0796017d0f5e..7a458ac1382d6e 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1356,6 +1356,9 @@ class CodeGenModule : public CodeGenTypeCache {
   /// Return true iff the given type uses 'sret' when used as a return type.
   bool ReturnTypeUsesSRet(const CGFunctionInfo &FI);
 
+  /// Return true iff the given type has `inreg` set.
+  bool ReturnTypeHasInReg(const CGFunctionInfo &FI);
+
   /// Return true iff the given type uses an argument slot when 'sret' is used
   /// as a return type.
   bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 1568b6e6275b9d..e8d75eda029e66 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -409,7 +409,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
       break;
     case BuiltinType::LongDouble:
       LongDoubleReferenced = true;
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
     case BuiltinType::BFloat16:
     case BuiltinType::Float:
     case BuiltinType::Double:
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index c1b9e60201b072..6af814acc4433d 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1323,9 +1323,14 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args,
 
 bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args,
                                            std::string &Path) const {
+  // Don't implicitly link in mode-changing libraries in a shared library, since
+  // this can have very deleterious effects. See the various links from
+  // https://github.com/llvm/llvm-project/issues/57589 for more information.
+  bool Default = !Args.hasArgNoClaim(options::OPT_shared);
+
   // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed
   // (to keep the linker options consistent with gcc and clang itself).
-  if (!isOptimizationLevelFast(Args)) {
+  if (Default && !isOptimizationLevelFast(Args)) {
     // Check if -ffast-math or -funsafe-math.
     Arg *A =
       Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
@@ -1334,8 +1339,14 @@ bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args,
 
     if (!A || A->getOption().getID() == options::OPT_fno_fast_math ||
         A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations)
-      return false;
+      Default = false;
   }
+
+  // Whatever decision came as a result of the above implicit settings, either
+  // -mdaz-ftz or -mno-daz-ftz is capable of overriding it.
+  if (!Args.hasFlag(options::OPT_mdaz_ftz, options::OPT_mno_daz_ftz, Default))
+    return false;
+
   // If crtfastmath.o exists add it to the arguments.
   Path = GetFilePath("crtfastmath.o");
   return (Path != "crtfastmath.o"); // Not found.
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index c1b350893b3744..aab98506adb96f 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -376,9 +376,7 @@ void AIX::AddOpenMPIncludeArgs(const ArgList &DriverArgs,
       addSystemInclude(DriverArgs, CC1Args, PathOpenMP.str());
       break;
     case Driver::OMPRT_IOMP5:
-      LLVM_FALLTHROUGH;
     case Driver::OMPRT_GOMP:
-      LLVM_FALLTHROUGH;
     case Driver::OMPRT_Unknown:
       // Unknown / unsupported include paths.
       break;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 79a118030e7c91..849fd1e21ddd64 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -863,46 +863,6 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
   }
 }
 
-/// Check whether the given input tree contains any compilation actions.
-static bool ContainsCompileAction(const Action *A) {
-  if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A))
-    return true;
-
-  return llvm::any_of(A->inputs(), ContainsCompileAction);
-}
-
-/// Check if -relax-all should be passed to the internal assembler.
-/// This is done by default when compiling non-assembler source with -O0.
-static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
-  bool RelaxDefault = true;
-
-  if (Arg *A = Args.getLastArg(options::OPT_O_Group))
-    RelaxDefault = A->getOption().matches(options::OPT_O0);
-
-  // RISC-V requires an indirect jump for offsets larger than 1MiB. This cannot
-  // be done by assembler branch relaxation as it needs a free temporary
-  // register. Because of this, branch relaxation is handled by a MachineIR
-  // pass before the assembler. Forcing assembler branch relaxation for -O0
-  // makes the MachineIR branch relaxation inaccurate and it will miss cases
-  // where an indirect branch is necessary. To avoid this issue we are
-  // sacrificing the compile time improvement of using -mrelax-all for -O0.
-  if (C.getDefaultToolChain().getTriple().isRISCV())
-    RelaxDefault = false;
-
-  if (RelaxDefault) {
-    RelaxDefault = false;
-    for (const auto &Act : C.getActions()) {
-      if (ContainsCompileAction(Act)) {
-        RelaxDefault = true;
-        break;
-      }
-    }
-  }
-
-  return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
-                      RelaxDefault);
-}
-
 static void
 RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
                         llvm::codegenoptions::DebugInfoKind DebugInfoKind,
@@ -2514,8 +2474,16 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
                                               const ArgList &Args,
                                               ArgStringList &CmdArgs,
                                               const Driver &D) {
-  if (UseRelaxAll(C, Args))
-    CmdArgs.push_back("-mrelax-all");
+  // Default to -mno-relax-all.
+  //
+  // Note: RISC-V requires an indirect jump for offsets larger than 1MiB. This
+  // cannot be done by assembler branch relaxation as it needs a free temporary
+  // register. Because of this, branch relaxation is handled by a MachineIR pass
+  // before the assembler. Forcing assembler branch relaxation for -O0 makes the
+  // MachineIR branch relaxation inaccurate and it will miss cases where an
+  // indirect branch is necessary.
+  Args.addOptInFlag(CmdArgs, options::OPT_mrelax_all,
+                    options::OPT_mno_relax_all);
 
   // Only default to -mincremental-linker-compatible if we think we are
   // targeting the MSVC linker.
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index f2a181e51bcf26..c32cf27cb1fcd8 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -1076,25 +1076,6 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args,
   ToolChain::addProfileRTLibs(Args, CmdArgs);
 }
 
-llvm::DenormalMode
-Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs,
-                                     const JobAction &JA,
-                                     const llvm::fltSemantics *FPType) const {
-  switch (getTriple().getArch()) {
-  case llvm::Triple::x86:
-  case llvm::Triple::x86_64: {
-    std::string Unused;
-    // DAZ and FTZ are turned on in crtfastmath.o
-    if (!DriverArgs.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
-        isFastMathRuntimeAvailable(DriverArgs, Unused))
-      return llvm::DenormalMode::getPreserveSign();
-    return llvm::DenormalMode::getIEEE();
-  }
-  default:
-    return llvm::DenormalMode::getIEEE();
-  }
-}
-
 void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const {
   for (const auto &Opt : ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h
index 6e5f7fb1e918d8..0cf4104814b5fe 100644
--- a/clang/lib/Driver/ToolChains/Linux.h
+++ b/clang/lib/Driver/ToolChains/Linux.h
@@ -65,10 +65,6 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF {
 
   std::vector<std::string> ExtraOpts;
 
-  llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
-      const llvm::fltSemantics *FPType = nullptr) const override;
-
   const char *getDefaultLinker() const override;
 
 protected:
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index ccb2c9190e2eff..c8d8ec3afbd990 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -807,7 +807,6 @@ template <> struct MappingTraits<FormatStyle> {
         FormatStyle PredefinedStyle;
         if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
             Style == PredefinedStyle) {
-          IO.mapOptional("# BasedOnStyle", StyleName);
           BasedOnStyle = StyleName;
           break;
         }
@@ -3117,6 +3116,7 @@ static void sortCppIncludes(const FormatStyle &Style,
     return;
   }
 
+  const auto OldCursor = Cursor ? *Cursor : 0;
   std::string result;
   for (unsigned Index : Indices) {
     if (!result.empty()) {
@@ -3140,6 +3140,8 @@ static void sortCppIncludes(const FormatStyle &Style,
   // the entire range of blocks. Otherwise, no replacement is generated.
   if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr(
                                  IncludesBeginOffset, IncludesBlockSize)))) {
+    if (Cursor)
+      *Cursor = OldCursor;
     return;
   }
 
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 6e3baf83864415..66a45b888f15cc 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1293,6 +1293,10 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
                                             diag::remark_module_build_done)
     << ModuleName;
 
+  // Propagate the statistics to the parent FileManager.
+  if (!FrontendOpts.ModulesShareFileManager)
+    ImportingInstance.getFileManager().AddStats(Instance.getFileManager());
+
   if (Crashed) {
     // Clear the ASTConsumer if it hasn't been already, in case it owns streams
     // that must be closed before clearing output files.
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index a2af738a053e5b..9ae7664b4b49d4 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -757,8 +757,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
   // IR files bypass the rest of initialization.
   if (Input.getKind().getLanguage() == Language::LLVM_IR) {
-    assert(hasIRSupport() &&
-           "This action does not have IR file support!");
+    if (!hasIRSupport()) {
+      CI.getDiagnostics().Report(diag::err_ast_action_on_llvm_ir)
+          << Input.getFile();
+      return false;
+    }
 
     // Inform the diagnostic client we are processing a source file.
     CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), nullptr);
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 4f44c3b7b89d4d..6bdd734e8a2752 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -389,8 +389,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
                         Twine((unsigned)LangOpts.getHLSLVersion()));
 
     if (LangOpts.NativeHalfType)
-      Builder.defineMacro("__HLSL_ENABLE_16_BIT",
-                          Twine((unsigned)LangOpts.getHLSLVersion()));
+      Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1");
 
     // Shader target information
     // "enums" for shader stages
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 32d96f81c4c8de..7d6febb04a82c4 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -31,6 +31,7 @@
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/SemaCUDA.h"
+#include "clang/Sema/SemaOpenACC.h"
 #include "clang/Sema/SemaOpenMP.h"
 #include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/TypoCorrection.h"
@@ -2070,15 +2071,22 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       if (!LHS.isInvalid() && !HasError && !Length.isInvalid() &&
           !Stride.isInvalid() && Tok.is(tok::r_square)) {
         if (ColonLocFirst.isValid() || ColonLocSecond.isValid()) {
-          // FIXME: OpenACC hasn't implemented Sema/Array section handling at a
-          // semantic level yet. For now, just reuse the OpenMP implementation
-          // as it gets the parsing/type management mostly right, and we can
-          // replace this call to ActOnOpenACCArraySectionExpr in the future.
-          // Eventually we'll genericize the OPenMPArraySectionExpr type as
-          // well.
-          LHS = Actions.OpenMP().ActOnOMPArraySectionExpr(
-              LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0],
-              ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(), RLoc);
+          // Like above, AllowOpenACCArraySections is 'more specific' and only
+          // enabled when actively parsing a 'var' in a 'var-list' during
+          // clause/'cache' construct parsing, so it is more specific. So we
+          // should do it first, so that the correct node gets created.
+          if (AllowOpenACCArraySections) {
+            assert(!Stride.isUsable() && !ColonLocSecond.isValid() &&
+                   "Stride/second colon not allowed for OpenACC");
+            LHS = Actions.OpenACC().ActOnArraySectionExpr(
+                LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0],
+                ColonLocFirst, Length.get(), RLoc);
+          } else {
+            LHS = Actions.OpenMP().ActOnOMPArraySectionExpr(
+                LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0],
+                ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(),
+                RLoc);
+          }
         } else {
           LHS = Actions.ActOnArraySubscriptExpr(getCurScope(), LHS.get(), Loc,
                                                 ArgExprs, RLoc);
diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp
index 8a18fca8064ee1..29326f5d993a9d 100644
--- a/clang/lib/Parse/ParseOpenACC.cpp
+++ b/clang/lib/Parse/ParseOpenACC.cpp
@@ -327,7 +327,7 @@ OpenACCReductionOperator ParseReductionOperator(Parser &P) {
       return OpenACCReductionOperator::Max;
     if (ReductionKindTok.getIdentifierInfo()->isStr("min"))
       return OpenACCReductionOperator::Min;
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   default:
     P.Diag(ReductionKindTok, diag::err_acc_invalid_reduction_operator);
     return OpenACCReductionOperator::Invalid;
@@ -945,7 +945,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
       // the 'update' clause, so we have to handle it here.  U se an assert to
       // make sure we get the right differentiator.
       assert(DirKind == OpenACCDirectiveKind::Update);
-      LLVM_FALLTHROUGH;
+      [[fallthrough]];
     case OpenACCClauseKind::Attach:
     case OpenACCClauseKind::Copy:
     case OpenACCClauseKind::Delete:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 7ef918970196e2..05e4c527f0b6e4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -18723,8 +18723,10 @@ void Sema::CheckArrayAccess(const Expr *expr) {
         expr = cast<MemberExpr>(expr)->getBase();
         break;
       }
-      case Stmt::OMPArraySectionExprClass: {
-        const OMPArraySectionExpr *ASE = cast<OMPArraySectionExpr>(expr);
+      case Stmt::ArraySectionExprClass: {
+        const ArraySectionExpr *ASE = cast<ArraySectionExpr>(expr);
+        // FIXME: We should probably be checking all of the elements to the
+        // 'length' here as well.
         if (ASE->getLowerBound())
           CheckArrayAccess(ASE->getBase(), ASE->getLowerBound(),
                            /*ASE=*/nullptr, AllowOnePastEnd > 0);
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 00384f9dc16aa0..c9dd6bb2413e38 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1314,7 +1314,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
     // Some might be dependent for other reasons.
   case Expr::ArraySubscriptExprClass:
   case Expr::MatrixSubscriptExprClass:
-  case Expr::OMPArraySectionExprClass:
+  case Expr::ArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
   case Expr::BinaryOperatorClass:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8d5b4199a681b9..71bb8f833d8997 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -5069,11 +5069,18 @@ ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base,
                                          SourceLocation rbLoc) {
 
   if (base && !base->getType().isNull() &&
-      base->hasPlaceholderType(BuiltinType::OMPArraySection))
-    return OpenMP().ActOnOMPArraySectionExpr(base, lbLoc, ArgExprs.front(),
-                                             SourceLocation(), SourceLocation(),
-                                             /*Length*/ nullptr,
-                                             /*Stride=*/nullptr, rbLoc);
+      base->hasPlaceholderType(BuiltinType::ArraySection)) {
+    auto *AS = cast<ArraySectionExpr>(base);
+    if (AS->isOMPArraySection())
+      return OpenMP().ActOnOMPArraySectionExpr(
+          base, lbLoc, ArgExprs.front(), SourceLocation(), SourceLocation(),
+          /*Length*/ nullptr,
+          /*Stride=*/nullptr, rbLoc);
+
+    return OpenACC().ActOnArraySectionExpr(base, lbLoc, ArgExprs.front(),
+                                           SourceLocation(), /*Length*/ nullptr,
+                                           rbLoc);
+  }
 
   // Since this might be a postfix expression, get rid of ParenListExprs.
   if (isa<ParenListExpr>(base)) {
@@ -6361,7 +6368,7 @@ static bool isPlaceholderToRemoveAsArg(QualType type) {
   case BuiltinType::BoundMember:
   case BuiltinType::BuiltinFn:
   case BuiltinType::IncompleteMatrixIdx:
-  case BuiltinType::OMPArraySection:
+  case BuiltinType::ArraySection:
   case BuiltinType::OMPArrayShaping:
   case BuiltinType::OMPIterator:
     return true;
@@ -21343,8 +21350,9 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
     return ExprError();
 
   // Expressions of unknown type.
-  case BuiltinType::OMPArraySection:
-    Diag(E->getBeginLoc(), diag::err_omp_array_section_use);
+  case BuiltinType::ArraySection:
+    Diag(E->getBeginLoc(), diag::err_array_section_use)
+        << cast<ArraySectionExpr>(E)->isOMPArraySection();
     return ExprError();
 
   // Expressions of unknown type.
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 793e16df178914..003a157990d307 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -7753,9 +7753,9 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
     break;
   }
 
-  case Stmt::OMPArraySectionExprClass: {
+  case Stmt::ArraySectionExprClass: {
     visitLocalsRetainedByInitializer(Path,
-                                     cast<OMPArraySectionExpr>(Init)->getBase(),
+                                     cast<ArraySectionExpr>(Init)->getBase(),
                                      Visit, true, EnableLifetimeWarnings);
     break;
   }
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index ba69e71e30a181..d5cfe82a5d7098 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -423,6 +423,21 @@ ExprResult SemaOpenACC::ActOnIntExpr(OpenACCDirectiveKind DK,
   return IntExpr;
 }
 
+ExprResult SemaOpenACC::ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc,
+                                              Expr *LowerBound,
+                                              SourceLocation ColonLoc,
+                                              Expr *Length,
+                                              SourceLocation RBLoc) {
+  ASTContext &Context = getASTContext();
+
+  // TODO OpenACC: We likely have to reproduce a lot of the same logic from the
+  // OMP version of this, but at the moment we don't have a good way to test it,
+  // so for now we'll just create the node.
+  return new (Context)
+      ArraySectionExpr(Base, LowerBound, Length, Context.ArraySectionTy,
+                       VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
+}
+
 bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K,
                                           SourceLocation StartLoc) {
   return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 4356d81bb9d7d4..7db09cdee9ef49 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2230,7 +2230,7 @@ bool SemaOpenMP::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level,
               dyn_cast<UnaryOperator>(Last->getAssociatedExpression());
           if ((UO && UO->getOpcode() == UO_Deref) ||
               isa<ArraySubscriptExpr>(Last->getAssociatedExpression()) ||
-              isa<OMPArraySectionExpr>(Last->getAssociatedExpression()) ||
+              isa<ArraySectionExpr>(Last->getAssociatedExpression()) ||
               isa<MemberExpr>(EI->getAssociatedExpression()) ||
               isa<OMPArrayShapingExpr>(Last->getAssociatedExpression())) {
             IsVariableAssociatedWithSection = true;
@@ -3884,7 +3884,7 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
                                     MappableComponent &MC) {
                                return MC.getAssociatedDeclaration() ==
                                           nullptr &&
-                                      (isa<OMPArraySectionExpr>(
+                                      (isa<ArraySectionExpr>(
                                            MC.getAssociatedExpression()) ||
                                        isa<OMPArrayShapingExpr>(
                                            MC.getAssociatedExpression()) ||
@@ -4062,7 +4062,7 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
                   // Do both expressions have the same kind?
                   if (CCI->getAssociatedExpression()->getStmtClass() !=
                       SC.getAssociatedExpression()->getStmtClass())
-                    if (!((isa<OMPArraySectionExpr>(
+                    if (!((isa<ArraySectionExpr>(
                                SC.getAssociatedExpression()) ||
                            isa<OMPArrayShapingExpr>(
                                SC.getAssociatedExpression())) &&
@@ -5428,9 +5428,9 @@ static std::pair<ValueDecl *, bool> getPrivateItem(Sema &S, Expr *&RefExpr,
         Base = TempASE->getBase()->IgnoreParenImpCasts();
       RefExpr = Base;
       IsArrayExpr = ArraySubscript;
-    } else if (auto *OASE = dyn_cast_or_null<OMPArraySectionExpr>(RefExpr)) {
+    } else if (auto *OASE = dyn_cast_or_null<ArraySectionExpr>(RefExpr)) {
       Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-      while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+      while (auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
         Base = TempOASE->getBase()->IgnoreParenImpCasts();
       while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
         Base = TempASE->getBase()->IgnoreParenImpCasts();
@@ -6060,10 +6060,10 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack,
       // Array section - need to check for the mapping of the array section
       // element.
       QualType CanonType = E->getType().getCanonicalType();
-      if (CanonType->isSpecificBuiltinType(BuiltinType::OMPArraySection)) {
-        const auto *OASE = cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts());
+      if (CanonType->isSpecificBuiltinType(BuiltinType::ArraySection)) {
+        const auto *OASE = cast<ArraySectionExpr>(E->IgnoreParenImpCasts());
         QualType BaseType =
-            OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+            ArraySectionExpr::getBaseOriginalType(OASE->getBase());
         QualType ElemType;
         if (const auto *ATy = BaseType->getAsArrayTypeUnsafe())
           ElemType = ATy->getElementType();
@@ -19534,7 +19534,7 @@ struct ReductionData {
 } // namespace
 
 static bool checkOMPArraySectionConstantForReduction(
-    ASTContext &Context, const OMPArraySectionExpr *OASE, bool &SingleElement,
+    ASTContext &Context, const ArraySectionExpr *OASE, bool &SingleElement,
     SmallVectorImpl<llvm::APSInt> &ArraySizes) {
   const Expr *Length = OASE->getLength();
   if (Length == nullptr) {
@@ -19561,7 +19561,7 @@ static bool checkOMPArraySectionConstantForReduction(
 
   // We require length = 1 for all array sections except the right-most to
   // guarantee that the memory region is contiguous and has no holes in it.
-  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) {
+  while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base)) {
     Length = TempOASE->getLength();
     if (Length == nullptr) {
       // For array sections of the form [1:] or [:], we would need to analyze
@@ -19766,12 +19766,12 @@ static bool actOnOMPReductionKindClause(
     Expr *TaskgroupDescriptor = nullptr;
     QualType Type;
     auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr->IgnoreParens());
-    auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr->IgnoreParens());
+    auto *OASE = dyn_cast<ArraySectionExpr>(RefExpr->IgnoreParens());
     if (ASE) {
       Type = ASE->getType().getNonReferenceType();
     } else if (OASE) {
       QualType BaseType =
-          OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+          ArraySectionExpr::getBaseOriginalType(OASE->getBase());
       if (const auto *ATy = BaseType->getAsArrayTypeUnsafe())
         Type = ATy->getElementType();
       else
@@ -21305,10 +21305,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPDependClause(
           // List items used in depend clauses cannot be zero-length array
           // sections.
           QualType ExprTy = RefExpr->getType().getNonReferenceType();
-          const auto *OASE = dyn_cast<OMPArraySectionExpr>(SimpleExpr);
+          const auto *OASE = dyn_cast<ArraySectionExpr>(SimpleExpr);
           if (OASE) {
             QualType BaseType =
-                OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+                ArraySectionExpr::getBaseOriginalType(OASE->getBase());
             if (BaseType.isNull())
               return nullptr;
             if (const auto *ATy = BaseType->getAsArrayTypeUnsafe())
@@ -21367,7 +21367,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPDependClause(
             Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf,
                                                RefExpr->IgnoreParenImpCasts());
           }
-          if (!Res.isUsable() && !isa<OMPArraySectionExpr>(SimpleExpr) &&
+          if (!Res.isUsable() && !isa<ArraySectionExpr>(SimpleExpr) &&
               !isa<OMPArrayShapingExpr>(SimpleExpr)) {
             Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
                 << (getLangOpts().OpenMP >= 50 ? 1 : 0)
@@ -21468,7 +21468,7 @@ static bool checkTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
 static bool checkArrayExpressionDoesNotReferToWholeSize(Sema &SemaRef,
                                                         const Expr *E,
                                                         QualType BaseQTy) {
-  const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+  const auto *OASE = dyn_cast<ArraySectionExpr>(E);
 
   // If this is an array subscript, it refers to the whole size if the size of
   // the dimension is constant and equals 1. Also, an array section assumes the
@@ -21526,7 +21526,7 @@ static bool checkArrayExpressionDoesNotReferToWholeSize(Sema &SemaRef,
 static bool checkArrayExpressionDoesNotReferToUnitySize(Sema &SemaRef,
                                                         const Expr *E,
                                                         QualType BaseQTy) {
-  const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+  const auto *OASE = dyn_cast<ArraySectionExpr>(E);
 
   // An array subscript always refer to a single element. Also, an array section
   // assumes the format of an array subscript if no colon is used.
@@ -21741,14 +21741,14 @@ class MapBaseChecker final : public StmtVisitor<MapBaseChecker, bool> {
     return RelevantExpr || Visit(E);
   }
 
-  bool VisitOMPArraySectionExpr(OMPArraySectionExpr *OASE) {
+  bool VisitArraySectionExpr(ArraySectionExpr *OASE) {
     // After OMP 5.0  Array section in reduction clause will be implicitly
     // mapped
     assert(!(SemaRef.getLangOpts().OpenMP < 50 && NoDiagnose) &&
            "Array sections cannot be implicitly mapped.");
     Expr *E = OASE->getBase()->IgnoreParenImpCasts();
     QualType CurType =
-        OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
+        ArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
 
     // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
     //  If the type of a list item is a reference to a type T then the type
@@ -21921,7 +21921,7 @@ static const Expr *checkMapClauseExpressionBase(
       auto CE = CurComponents.rend();
       for (; CI != CE; ++CI) {
         const auto *OASE =
-            dyn_cast<OMPArraySectionExpr>(CI->getAssociatedExpression());
+            dyn_cast<ArraySectionExpr>(CI->getAssociatedExpression());
         if (!OASE)
           continue;
         if (OASE && OASE->getLength())
@@ -21991,10 +21991,10 @@ static bool checkMapConflicts(
           //  variable in map clauses of the same construct.
           if (CurrentRegionOnly &&
               (isa<ArraySubscriptExpr>(CI->getAssociatedExpression()) ||
-               isa<OMPArraySectionExpr>(CI->getAssociatedExpression()) ||
+               isa<ArraySectionExpr>(CI->getAssociatedExpression()) ||
                isa<OMPArrayShapingExpr>(CI->getAssociatedExpression())) &&
               (isa<ArraySubscriptExpr>(SI->getAssociatedExpression()) ||
-               isa<OMPArraySectionExpr>(SI->getAssociatedExpression()) ||
+               isa<ArraySectionExpr>(SI->getAssociatedExpression()) ||
                isa<OMPArrayShapingExpr>(SI->getAssociatedExpression()))) {
             SemaRef.Diag(CI->getAssociatedExpression()->getExprLoc(),
                          diag::err_omp_multiple_array_items_in_map_clause)
@@ -22022,11 +22022,10 @@ static bool checkMapConflicts(
           if (const auto *ASE =
                   dyn_cast<ArraySubscriptExpr>(SI->getAssociatedExpression())) {
             Type = ASE->getBase()->IgnoreParenImpCasts()->getType();
-          } else if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(
+          } else if (const auto *OASE = dyn_cast<ArraySectionExpr>(
                          SI->getAssociatedExpression())) {
             const Expr *E = OASE->getBase()->IgnoreParenImpCasts();
-            Type =
-                OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
+            Type = ArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
           } else if (const auto *OASE = dyn_cast<OMPArrayShapingExpr>(
                          SI->getAssociatedExpression())) {
             Type = OASE->getBase()->getType()->getPointeeType();
@@ -22501,13 +22500,13 @@ static void checkMappableExpressionList(
     (void)I;
     QualType Type;
     auto *ASE = dyn_cast<ArraySubscriptExpr>(VE->IgnoreParens());
-    auto *OASE = dyn_cast<OMPArraySectionExpr>(VE->IgnoreParens());
+    auto *OASE = dyn_cast<ArraySectionExpr>(VE->IgnoreParens());
     auto *OAShE = dyn_cast<OMPArrayShapingExpr>(VE->IgnoreParens());
     if (ASE) {
       Type = ASE->getType().getNonReferenceType();
     } else if (OASE) {
       QualType BaseType =
-          OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+          ArraySectionExpr::getBaseOriginalType(OASE->getBase());
       if (const auto *ATy = BaseType->getAsArrayTypeUnsafe())
         Type = ATy->getElementType();
       else
@@ -23976,7 +23975,7 @@ SemaOpenMP::ActOnOpenMPUseDeviceAddrClause(ArrayRef<Expr *> VarList,
     MVLI.VarBaseDeclarations.push_back(D);
     MVLI.VarComponents.emplace_back();
     Expr *Component = SimpleRefExpr;
-    if (VD && (isa<OMPArraySectionExpr>(RefExpr->IgnoreParenImpCasts()) ||
+    if (VD && (isa<ArraySectionExpr>(RefExpr->IgnoreParenImpCasts()) ||
                isa<ArraySubscriptExpr>(RefExpr->IgnoreParenImpCasts())))
       Component =
           SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get();
@@ -24126,7 +24125,7 @@ SemaOpenMP::ActOnOpenMPHasDeviceAddrClause(ArrayRef<Expr *> VarList,
     // against other clauses later on.
     Expr *Component = SimpleRefExpr;
     auto *VD = dyn_cast<VarDecl>(D);
-    if (VD && (isa<OMPArraySectionExpr>(RefExpr->IgnoreParenImpCasts()) ||
+    if (VD && (isa<ArraySectionExpr>(RefExpr->IgnoreParenImpCasts()) ||
                isa<ArraySubscriptExpr>(RefExpr->IgnoreParenImpCasts())))
       Component =
           SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get();
@@ -24540,7 +24539,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPAffinityClause(
       Sema::TentativeAnalysisScope Trap(SemaRef);
       Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, SimpleExpr);
     }
-    if (!Res.isUsable() && !isa<OMPArraySectionExpr>(SimpleExpr) &&
+    if (!Res.isUsable() && !isa<ArraySectionExpr>(SimpleExpr) &&
         !isa<OMPArrayShapingExpr>(SimpleExpr)) {
       Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item)
           << 1 << 0 << RefExpr->getSourceRange();
@@ -24653,7 +24652,7 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr(
     Expr *Stride, SourceLocation RBLoc) {
   ASTContext &Context = getASTContext();
   if (Base->hasPlaceholderType() &&
-      !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) {
+      !Base->hasPlaceholderType(BuiltinType::ArraySection)) {
     ExprResult Result = SemaRef.CheckPlaceholderExpr(Base);
     if (Result.isInvalid())
       return ExprError();
@@ -24693,13 +24692,13 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr(
        (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) ||
       (Length && (Length->isTypeDependent() || Length->isValueDependent())) ||
       (Stride && (Stride->isTypeDependent() || Stride->isValueDependent()))) {
-    return new (Context) OMPArraySectionExpr(
+    return new (Context) ArraySectionExpr(
         Base, LowerBound, Length, Stride, Context.DependentTy, VK_LValue,
         OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc);
   }
 
   // Perform default conversions.
-  QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base);
+  QualType OriginalTy = ArraySectionExpr::getBaseOriginalType(Base);
   QualType ResultTy;
   if (OriginalTy->isAnyPointerType()) {
     ResultTy = OriginalTy->getPointeeType();
@@ -24822,14 +24821,14 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr(
     }
   }
 
-  if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) {
+  if (!Base->hasPlaceholderType(BuiltinType::ArraySection)) {
     ExprResult Result = SemaRef.DefaultFunctionArrayLvalueConversion(Base);
     if (Result.isInvalid())
       return ExprError();
     Base = Result.get();
   }
-  return new (Context) OMPArraySectionExpr(
-      Base, LowerBound, Length, Stride, Context.OMPArraySectionTy, VK_LValue,
+  return new (Context) ArraySectionExpr(
+      Base, LowerBound, Length, Stride, Context.ArraySectionTy, VK_LValue,
       OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc);
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 1d30ba31e17940..f47bc219e6fa32 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2784,15 +2784,23 @@ class TreeTransform {
   ///
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
-  ExprResult RebuildOMPArraySectionExpr(Expr *Base, SourceLocation LBracketLoc,
-                                        Expr *LowerBound,
-                                        SourceLocation ColonLocFirst,
-                                        SourceLocation ColonLocSecond,
-                                        Expr *Length, Expr *Stride,
-                                        SourceLocation RBracketLoc) {
-    return getSema().OpenMP().ActOnOMPArraySectionExpr(
-        Base, LBracketLoc, LowerBound, ColonLocFirst, ColonLocSecond, Length,
-        Stride, RBracketLoc);
+  ExprResult RebuildArraySectionExpr(bool IsOMPArraySection, Expr *Base,
+                                     SourceLocation LBracketLoc,
+                                     Expr *LowerBound,
+                                     SourceLocation ColonLocFirst,
+                                     SourceLocation ColonLocSecond,
+                                     Expr *Length, Expr *Stride,
+                                     SourceLocation RBracketLoc) {
+    if (IsOMPArraySection)
+      return getSema().OpenMP().ActOnOMPArraySectionExpr(
+          Base, LBracketLoc, LowerBound, ColonLocFirst, ColonLocSecond, Length,
+          Stride, RBracketLoc);
+
+    assert(Stride == nullptr && !ColonLocSecond.isValid() &&
+           "Stride/second colon not allowed for OpenACC");
+
+    return getSema().OpenACC().ActOnArraySectionExpr(
+        Base, LBracketLoc, LowerBound, ColonLocFirst, Length, RBracketLoc);
   }
 
   /// Build a new array shaping expression.
@@ -11742,7 +11750,7 @@ TreeTransform<Derived>::TransformMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
 
 template <typename Derived>
 ExprResult
-TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
+TreeTransform<Derived>::TransformArraySectionExpr(ArraySectionExpr *E) {
   ExprResult Base = getDerived().TransformExpr(E->getBase());
   if (Base.isInvalid())
     return ExprError();
@@ -11762,20 +11770,25 @@ TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
   }
 
   ExprResult Stride;
-  if (Expr *Str = E->getStride()) {
-    Stride = getDerived().TransformExpr(Str);
-    if (Stride.isInvalid())
-      return ExprError();
+  if (E->isOMPArraySection()) {
+    if (Expr *Str = E->getStride()) {
+      Stride = getDerived().TransformExpr(Str);
+      if (Stride.isInvalid())
+        return ExprError();
+    }
   }
 
   if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
-      LowerBound.get() == E->getLowerBound() && Length.get() == E->getLength())
+      LowerBound.get() == E->getLowerBound() &&
+      Length.get() == E->getLength() &&
+      (E->isOpenACCArraySection() || Stride.get() == E->getStride()))
     return E;
 
-  return getDerived().RebuildOMPArraySectionExpr(
-      Base.get(), E->getBase()->getEndLoc(), LowerBound.get(),
-      E->getColonLocFirst(), E->getColonLocSecond(), Length.get(), Stride.get(),
-      E->getRBracketLoc());
+  return getDerived().RebuildArraySectionExpr(
+      E->isOMPArraySection(), Base.get(), E->getBase()->getEndLoc(),
+      LowerBound.get(), E->getColonLocFirst(),
+      E->isOMPArraySection() ? E->getColonLocSecond() : SourceLocation{},
+      Length.get(), Stride.get(), E->getRBracketLoc());
 }
 
 template <typename Derived>
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp
index f8d54c0c398906..e017f5bdb48858 100644
--- a/clang/lib/Serialization/ASTCommon.cpp
+++ b/clang/lib/Serialization/ASTCommon.cpp
@@ -261,8 +261,8 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
   case BuiltinType::IncompleteMatrixIdx:
     ID = PREDEF_TYPE_INCOMPLETE_MATRIX_IDX;
     break;
-  case BuiltinType::OMPArraySection:
-    ID = PREDEF_TYPE_OMP_ARRAY_SECTION;
+  case BuiltinType::ArraySection:
+    ID = PREDEF_TYPE_ARRAY_SECTION;
     break;
   case BuiltinType::OMPArrayShaping:
     ID = PREDEF_TYPE_OMP_ARRAY_SHAPING;
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c99d6ed1c36c88..0ef57a3ea804ef 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7385,11 +7385,11 @@ QualType ASTReader::GetType(TypeID ID) {
     case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX:
       T = Context.IncompleteMatrixIdxTy;
       break;
-    case PREDEF_TYPE_OMP_ARRAY_SECTION:
-      T = Context.OMPArraySectionTy;
+    case PREDEF_TYPE_ARRAY_SECTION:
+      T = Context.ArraySectionTy;
       break;
     case PREDEF_TYPE_OMP_ARRAY_SHAPING:
-      T = Context.OMPArraySectionTy;
+      T = Context.OMPArrayShapingTy;
       break;
     case PREDEF_TYPE_OMP_ITERATOR:
       T = Context.OMPIteratorTy;
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index baded0fe19831f..7d3930022a69c0 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -956,14 +956,22 @@ void ASTStmtReader::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
   E->setRBracketLoc(readSourceLocation());
 }
 
-void ASTStmtReader::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
+void ASTStmtReader::VisitArraySectionExpr(ArraySectionExpr *E) {
   VisitExpr(E);
+  E->ASType = Record.readEnum<ArraySectionExpr::ArraySectionType>();
+
   E->setBase(Record.readSubExpr());
   E->setLowerBound(Record.readSubExpr());
   E->setLength(Record.readSubExpr());
-  E->setStride(Record.readSubExpr());
+
+  if (E->isOMPArraySection())
+    E->setStride(Record.readSubExpr());
+
   E->setColonLocFirst(readSourceLocation());
-  E->setColonLocSecond(readSourceLocation());
+
+  if (E->isOMPArraySection())
+    E->setColonLocSecond(readSourceLocation());
+
   E->setRBracketLoc(readSourceLocation());
 }
 
@@ -3090,8 +3098,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) MatrixSubscriptExpr(Empty);
       break;
 
-    case EXPR_OMP_ARRAY_SECTION:
-      S = new (Context) OMPArraySectionExpr(Empty);
+    case EXPR_ARRAY_SECTION:
+      S = new (Context) ArraySectionExpr(Empty);
       break;
 
     case EXPR_OMP_ARRAY_SHAPING:
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index cd5f733baf76f4..39aec31b6d8790 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -880,16 +880,21 @@ void ASTStmtWriter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
   Code = serialization::EXPR_ARRAY_SUBSCRIPT;
 }
 
-void ASTStmtWriter::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
+void ASTStmtWriter::VisitArraySectionExpr(ArraySectionExpr *E) {
   VisitExpr(E);
+  Record.writeEnum(E->ASType);
   Record.AddStmt(E->getBase());
   Record.AddStmt(E->getLowerBound());
   Record.AddStmt(E->getLength());
-  Record.AddStmt(E->getStride());
+  if (E->isOMPArraySection())
+    Record.AddStmt(E->getStride());
   Record.AddSourceLocation(E->getColonLocFirst());
-  Record.AddSourceLocation(E->getColonLocSecond());
+
+  if (E->isOMPArraySection())
+    Record.AddSourceLocation(E->getColonLocSecond());
+
   Record.AddSourceLocation(E->getRBracketLoc());
-  Code = serialization::EXPR_OMP_ARRAY_SECTION;
+  Code = serialization::EXPR_ARRAY_SECTION;
 }
 
 void ASTStmtWriter::VisitOMPArrayShapingExpr(OMPArrayShapingExpr *E) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
index a678c3827e7f12..1cebfbbee77dae 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
@@ -188,9 +188,9 @@ void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State,
     os << DerefStr1;
     break;
   }
-  case Stmt::OMPArraySectionExprClass: {
+  case Stmt::ArraySectionExprClass: {
     os << "Array access";
-    const OMPArraySectionExpr *AE = cast<OMPArraySectionExpr>(S);
+    const ArraySectionExpr *AE = cast<ArraySectionExpr>(S);
     AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
                    State.get(), N->getLocationContext());
     os << DerefStr1;
diff --git a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
index 1cf81b54e77d32..7ac34ef8164e4c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
@@ -350,7 +350,7 @@ static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
     return false;
   case Stmt::CallExprClass:
   case Stmt::ArraySubscriptExprClass:
-  case Stmt::OMPArraySectionExprClass:
+  case Stmt::ArraySectionExprClass:
   case Stmt::OMPArrayShapingExprClass:
   case Stmt::OMPIteratorExprClass:
   case Stmt::ImplicitCastExprClass:
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 09c69f9612d96b..0b1edf3e5c96bf 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1948,7 +1948,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXPseudoDestructorExprClass:
     case Stmt::SubstNonTypeTemplateParmExprClass:
     case Stmt::CXXNullPtrLiteralExprClass:
-    case Stmt::OMPArraySectionExprClass:
+    case Stmt::ArraySectionExprClass:
     case Stmt::OMPArrayShapingExprClass:
     case Stmt::OMPIteratorExprClass:
     case Stmt::SYCLUniqueStableNameExprClass:
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 32850f5eea92a9..0c047b6c5da2f8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -439,6 +439,9 @@ class DependencyScanningAction : public tooling::ToolAction {
     if (Result)
       setLastCC1Arguments(std::move(OriginalInvocation));
 
+    // Propagate the statistics to the parent FileManager.
+    DriverFileMgr->AddStats(ScanInstance.getFileManager());
+
     return Result;
   }
 
diff --git a/clang/test/CodeGen/X86/ms-x86-intrinsics.c b/clang/test/CodeGen/X86/ms-x86-intrinsics.c
index a1c90d71c8ebf5..aa557c8e19a83d 100644
--- a/clang/test/CodeGen/X86/ms-x86-intrinsics.c
+++ b/clang/test/CodeGen/X86/ms-x86-intrinsics.c
@@ -48,7 +48,7 @@ long long test__readfsqword(unsigned long Offset) {
 __int64 test__emul(int a, int b) {
   return __emul(a, b);
 }
-// CHECK-LABEL: define dso_local i64 @test__emul(i32 noundef %a, i32 noundef %b)
+// CHECK-LABEL: define dso_local range(i64 -4611686016279904256, 4611686018427387905) i64 @test__emul(i32 noundef %a, i32 noundef %b)
 // CHECK: [[X:%[0-9]+]] = sext i32 %a to i64
 // CHECK: [[Y:%[0-9]+]] = sext i32 %b to i64
 // CHECK: [[RES:%[0-9]+]] = mul nsw i64 [[Y]], [[X]]
@@ -57,7 +57,7 @@ __int64 test__emul(int a, int b) {
 unsigned __int64 test__emulu(unsigned int a, unsigned int b) {
   return __emulu(a, b);
 }
-// CHECK-LABEL: define dso_local i64 @test__emulu(i32 noundef %a, i32 noundef %b)
+// CHECK-LABEL: define dso_local range(i64 0, -8589934590) i64 @test__emulu(i32 noundef %a, i32 noundef %b)
 // CHECK: [[X:%[0-9]+]] = zext i32 %a to i64
 // CHECK: [[Y:%[0-9]+]] = zext i32 %b to i64
 // CHECK: [[RES:%[0-9]+]] = mul nuw i64 [[Y]], [[X]]
@@ -108,13 +108,13 @@ long long test__readgsqword(unsigned long Offset) {
 __int64 test__mulh(__int64 a, __int64 b) {
   return __mulh(a, b);
 }
-// CHECK-X64-LABEL: define dso_local i64 @test__mulh(i64 noundef %a, i64 noundef %b)
+// CHECK-X64-LABEL: define dso_local range(i64 -4611686018427387904, 4611686018427387905) i64 @test__mulh(i64 noundef %a, i64 noundef %b)
 // CHECK-X64: = mul nsw i128 %
 
 unsigned __int64 test__umulh(unsigned __int64 a, unsigned __int64 b) {
   return __umulh(a, b);
 }
-// CHECK-X64-LABEL: define dso_local i64 @test__umulh(i64 noundef %a, i64 noundef %b)
+// CHECK-X64-LABEL: define dso_local range(i64 0, -1) i64 @test__umulh(i64 noundef %a, i64 noundef %b)
 // CHECK-X64: = mul nuw i128 %
 
 __int64 test_mul128(__int64 Multiplier,
diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index 1fb39f9a346667..de30a00138ac80 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -66,7 +66,7 @@ struct anon_struct {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -114,7 +114,7 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -158,7 +158,7 @@ void test2(struct annotated *p, size_t index) {
   p->array[index] = __builtin_dynamic_object_size(p->array, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test2_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -169,7 +169,7 @@ void test2(struct annotated *p, size_t index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP3]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test2_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -203,7 +203,7 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
@@ -257,7 +257,7 @@ void test3(struct annotated *p, size_t index) {
   p->array[index] = __builtin_dynamic_object_size(p, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -270,7 +270,7 @@ void test3(struct annotated *p, size_t index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP5]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -308,7 +308,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], 2
@@ -325,7 +325,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[IDXPROM13]], [[TMP6]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP7]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds16:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM13]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM13]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont20:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD7]], 3
@@ -342,7 +342,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP13:%.*]] = icmp ult i64 [[IDXPROM30]], [[TMP12]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP13]], label [[CONT37:%.*]], label [[HANDLER_OUT_OF_BOUNDS33:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds33:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM30]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM30]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont37:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM30]]
@@ -441,7 +441,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
   p->array[index + 2] = (unsigned char)__builtin_dynamic_object_size(&(p->array[fam_idx]), 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test4_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -456,7 +456,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP3]], i64 0
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP7]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test4_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -494,7 +494,7 @@ size_t test4_bdos(struct annotated *p, int index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
@@ -545,7 +545,7 @@ void test5(struct anon_struct *p, int index) {
   p->array[index] = __builtin_dynamic_object_size(p, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -556,7 +556,7 @@ void test5(struct anon_struct *p, int index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = select i1 [[DOTINV]], i64 0, i64 [[TMP1]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP2]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -590,7 +590,7 @@ size_t test5_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
@@ -683,7 +683,7 @@ size_t test6_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
@@ -756,7 +756,7 @@ size_t test7_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont9:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
@@ -797,7 +797,7 @@ void test8(struct union_of_fams *p, int index) {
   p->ints[index] = __builtin_dynamic_object_size(p->ints, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test8_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -805,7 +805,7 @@ void test8(struct union_of_fams *p, int index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i8 [[DOT_COUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP0]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test8_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -955,7 +955,7 @@ void test10(struct union_of_fams *p, int index) {
   p->bytes[index] = (unsigned char)__builtin_dynamic_object_size(p->bytes, 1);
 }
 
-// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test10_bdos(
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -964,7 +964,7 @@ void test10(struct union_of_fams *p, int index) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NARROW]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP0]]
 //
-// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test10_bdos(
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
@@ -1095,10 +1095,10 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 0) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT_ANON_5:%.*]], ptr @test12_foo, i64 1, i32 0, i32 0, i32 0) to i64)) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT_ANON_5:%.*]], ptr @test12_foo, i64 1, i32 0, i32 0, i32 0) to i64)) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
@@ -1188,7 +1188,7 @@ struct test13_bar {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], [[INDEX]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB23:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
@@ -1249,7 +1249,7 @@ struct test14_foo {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       trap:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR10]]
@@ -1305,7 +1305,7 @@ int test14(int idx) {
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       trap:
 // SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR10]]
@@ -1326,7 +1326,7 @@ int test14(int idx) {
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       trap:
 // SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR8]]
@@ -1487,7 +1487,7 @@ struct tests_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB26:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
@@ -1528,7 +1528,7 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
@@ -1580,7 +1580,7 @@ struct test26_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8
@@ -1651,7 +1651,7 @@ struct test27_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 24
@@ -1717,7 +1717,7 @@ struct test28_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont17:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12
@@ -1779,7 +1779,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
@@ -1791,7 +1791,7 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM15]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds16:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR10]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR10]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont20:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12
@@ -1826,7 +1826,7 @@ struct annotated_struct_array {
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont21:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
diff --git a/clang/test/CodeGen/ms-mixed-ptr-sizes.c b/clang/test/CodeGen/ms-mixed-ptr-sizes.c
index 89d05fd30b72c2..51bea60eb39dce 100644
--- a/clang/test/CodeGen/ms-mixed-ptr-sizes.c
+++ b/clang/test/CodeGen/ms-mixed-ptr-sizes.c
@@ -49,7 +49,7 @@ void test_other(struct Foo *f, __attribute__((address_space(10))) int *i) {
 }
 
 int test_compare1(int *__ptr32 __uptr i, int *__ptr64 j) {
-  // ALL-LABEL: define dso_local noundef i32 @test_compare1
+  // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare1
   // X64: %{{.+}} = addrspacecast ptr %j to ptr addrspace(271)
   // X64: %cmp = icmp eq ptr addrspace(271) %{{.+}}, %i
   // X86: %{{.+}} = addrspacecast ptr addrspace(272) %j to ptr addrspace(271)
@@ -58,7 +58,7 @@ int test_compare1(int *__ptr32 __uptr i, int *__ptr64 j) {
 }
 
 int test_compare2(int *__ptr32 __sptr i, int *__ptr64 j) {
-  // ALL-LABEL: define dso_local noundef i32 @test_compare2
+  // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare2
   // X64: %{{.+}} = addrspacecast ptr %j to ptr addrspace(270)
   // X64: %cmp = icmp eq ptr addrspace(270) %{{.+}}, %i
   // X86: %{{.+}} = addrspacecast ptr addrspace(272) %j to ptr
@@ -67,7 +67,7 @@ int test_compare2(int *__ptr32 __sptr i, int *__ptr64 j) {
 }
 
 int test_compare3(int *__ptr32 __uptr i, int *__ptr64 j) {
-  // ALL-LABEL: define dso_local noundef i32 @test_compare3
+  // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare3
   // X64: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr
   // X64: %cmp = icmp eq ptr %{{.+}}, %j
   // X86: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr addrspace(272)
@@ -76,7 +76,7 @@ int test_compare3(int *__ptr32 __uptr i, int *__ptr64 j) {
 }
 
 int test_compare4(int *__ptr32 __sptr i, int *__ptr64 j) {
-  // ALL-LABEL: define dso_local noundef i32 @test_compare4
+  // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare4
   // X64: %{{.+}} = addrspacecast ptr addrspace(270) %i to ptr
   // X64: %cmp = icmp eq ptr %{{.+}}, %j
   // X86: %{{.+}} = addrspacecast ptr %i to ptr addrspace(272)
diff --git a/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm b/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm
new file mode 100644
index 00000000000000..3bbdbebc5cb576
--- /dev/null
+++ b/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple aarch64-pc-windows-msvc -fobjc-runtime=gnustep-2.2 -fobjc-dispatch-method=non-legacy -emit-llvm -o - %s | FileCheck %s
+
+// Pass and return for type size <= 8 bytes.
+struct S1 {
+  int a[2];
+};
+
+// Pass and return hfa <= 8 bytes
+struct F1 {
+  float a[2];
+};
+
+// Pass and return for type size > 16 bytes.
+struct S2 {
+  int a[5];
+};
+
+// Pass and return aggregate (of size < 16 bytes) with non-trivial destructor.
+// Sret and inreg: Returned in x0
+struct S3 {
+  int a[3];
+  ~S3();
+};
+S3::~S3() {
+}
+
+
+@interface MsgTest { id isa; } @end
+@implementation MsgTest 
+- (S1) smallS1 {
+  S1 x;
+  x.a[0] = 0;
+  x.a[1] = 1;
+  return x;
+
+}
+- (F1) smallF1 {
+  F1 x;
+  x.a[0] = 0.2f;
+  x.a[1] = 0.5f;
+  return x;
+}
+- (S2) stretS2 {
+  S2 x;
+  for (int i = 0; i < 5; i++) {
+    x.a[i] = i;
+  }
+  return x;
+}
+- (S3) stretInRegS3 {
+  S3 x;
+  for (int i = 0; i < 3; i++) {
+    x.a[i] = i;
+  }
+  return x;
+}
++ (S3) msgTestStretInRegS3 {
+  S3 x;
+  for (int i = 0; i < 3; i++) {
+    x.a[i] = i;
+  }
+  return x;
+}
+@end
+
+void test0(MsgTest *t) {
+    // CHECK: call {{.*}} @objc_msgSend
+    S1 ret = [t smallS1];
+    // CHECK: call {{.*}} @objc_msgSend
+    F1 ret2 = [t smallF1];
+    // CHECK: call {{.*}} @objc_msgSend_stret
+    S2 ret3 = [t stretS2];
+    // CHECK: call {{.*}} @objc_msgSend_stret2
+    S3 ret4 = [t stretInRegS3];
+    // CHECK: call {{.*}} @objc_msgSend_stret2
+    S3 ret5 = [MsgTest msgTestStretInRegS3];
+}
diff --git a/clang/test/Driver/default-denormal-fp-math.c b/clang/test/Driver/default-denormal-fp-math.c
index 5f87e151df49e4..c04ad5c08b8d0d 100644
--- a/clang/test/Driver/default-denormal-fp-math.c
+++ b/clang/test/Driver/default-denormal-fp-math.c
@@ -3,15 +3,6 @@
 
 // RUN: %clang -### -target x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
 
-// crtfastmath enables ftz and daz
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
-
-// crt not linked in with nostartfiles
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math -nostartfiles --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
-
-// If there's no crtfastmath, don't assume ftz/daz
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=/dev/null -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s
-
 // RUN: %clang -### -target x86_64-scei-ps4 -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s
 
 // Flag omitted for default
diff --git a/clang/test/Driver/integrated-as.c b/clang/test/Driver/integrated-as.c
index e78fde873cf47f..b0a26f6011b0c7 100644
--- a/clang/test/Driver/integrated-as.c
+++ b/clang/test/Driver/integrated-as.c
@@ -3,7 +3,7 @@
 // RUN: %clang -### -c -save-temps -integrated-as --target=x86_64 %s 2>&1 | FileCheck %s
 
 // CHECK: cc1as
-// CHECK: -mrelax-all
+// CHECK-NOT: -mrelax-all
 
 // RISC-V does not enable -mrelax-all
 // RUN: %clang -### -c -save-temps -integrated-as --target=riscv64 %s 2>&1 | FileCheck %s -check-prefix=RISCV-RELAX
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index d918f4f2d7dbd9..958e682b6c3c11 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -1446,6 +1446,32 @@
 // RUN: %clang --target=i386-unknown-linux -no-pie -### %s -ffast-math \
 // RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
+// Don't link crtfastmath.o with -shared
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -shared \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -Ofast -shared \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
+// Check for effects of -mdaz-ftz
+// RUN: %clang --target=x86_64-unknown-linux -### %s -ffast-math -shared -mdaz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -mdaz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -mdaz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -### %s -ffast-math -shared -mno-daz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -mno-daz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
+// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -mno-daz-ftz \
+// RUN:        --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s
 // CHECK-CRTFASTMATH: usr/lib/gcc/x86_64-unknown-linux/10.2.0{{/|\\\\}}crtfastmath.o
 // CHECK-NOCRTFASTMATH-NOT: crtfastmath.o
 
diff --git a/clang/test/Frontend/ast-dump-on-llvm.ll b/clang/test/Frontend/ast-dump-on-llvm.ll
new file mode 100644
index 00000000000000..cdacfde4ba848c
--- /dev/null
+++ b/clang/test/Frontend/ast-dump-on-llvm.ll
@@ -0,0 +1,29 @@
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump=json %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-EQ-JSON
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump=default %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-EQ-DEFAULT
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all=json %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL-EQ-JSON
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all=default %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL-EQ-DEFAULT
+
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-print %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-PRINT
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-view %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-VIEW
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-list %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-LIST
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-lookups %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-LOOKUP
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-filter=FunctionDecl %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-FILTER-EQ
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-decl-types %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-DECL-TYPES
+; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SYNTAX-ONLY
+
+
+; CHECK-AST-DUMP: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-EQ-JSON: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-EQ-DEFAULT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-ALL: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-ALL-EQ-JSON: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-ALL-EQ-DEFAULT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-PRINT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-VIEW: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-LIST: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-LOOKUP: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-FILTER-EQ: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-AST-DUMP-DECL-TYPES: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
+; CHECK-SYNTAX-ONLY: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}'
diff --git a/clang/test/OpenMP/task_depend_messages.cpp b/clang/test/OpenMP/task_depend_messages.cpp
index 388595bef4de1b..3f39c55527b5d4 100644
--- a/clang/test/OpenMP/task_depend_messages.cpp
+++ b/clang/test/OpenMP/task_depend_messages.cpp
@@ -62,7 +62,7 @@ int main(int argc, char **argv, char *env[]) {
   #pragma omp task depend(in : argv[ : argc][1 : argc - 1])
   #pragma omp task depend(in : arr[0])
   #pragma omp task depend(depobj:argc) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}}
-  #pragma omp task depend(depobj : argv[ : argc][1 : argc - 1]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not '<OpenMP array section type>'}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not '<OpenMP array section type>'}}
+  #pragma omp task depend(depobj : argv[ : argc][1 : argc - 1]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not '<array section type>'}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not '<array section type>'}}
   #pragma omp task depend(depobj : arr[0]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}}
   #pragma omp task depend(in : ([ // expected-error {{expected variable name or 'this' in lambda capture list}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp task depend(in : ([] // expected-error {{expected body of lambda expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
diff --git a/clang/test/ParserOpenACC/parse-cache-construct.cpp b/clang/test/ParserOpenACC/parse-cache-construct.cpp
index f0a35824696d8c..1ab2153a68be8d 100644
--- a/clang/test/ParserOpenACC/parse-cache-construct.cpp
+++ b/clang/test/ParserOpenACC/parse-cache-construct.cpp
@@ -74,12 +74,12 @@ void use() {
   for (int i = 0; i < 10; ++i) {
     // FIXME: Once we have a new array-section type to represent OpenACC as
     // well, change this error message.
-    // expected-error@+2{{OpenMP array section is not allowed here}}
+    // expected-error@+2{{OpenACC sub-array is not allowed here}}
     // expected-warning@+1{{OpenACC construct 'cache' not yet implemented, pragma ignored}}
     #pragma acc cache(Arrs.MemArr[3:4].array[1:4])
   }
   for (int i = 0; i < 10; ++i) {
-    // expected-error@+2{{OpenMP array section is not allowed here}}
+    // expected-error@+2{{OpenACC sub-array is not allowed here}}
     // expected-warning@+1{{OpenACC construct 'cache' not yet implemented, pragma ignored}}
     #pragma acc cache(Arrs.MemArr[3:4].array[4])
   }
diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c
index 799f22b8c120e5..ee2cb2d1501dea 100644
--- a/clang/test/ParserOpenACC/parse-clauses.c
+++ b/clang/test/ParserOpenACC/parse-clauses.c
@@ -482,13 +482,13 @@ void VarListClauses() {
 #pragma acc serial copy(HasMem.MemArr[3].array[1:4]), seq
   for(;;){}
 
-  // expected-error@+3{{OpenMP array section is not allowed here}}
+  // expected-error@+3{{OpenACC sub-array is not allowed here}}
   // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}}
   // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}}
 #pragma acc serial copy(HasMem.MemArr[1:3].array[1]), seq
   for(;;){}
 
-  // expected-error@+3{{OpenMP array section is not allowed here}}
+  // expected-error@+3{{OpenACC sub-array is not allowed here}}
   // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}}
   // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}}
 #pragma acc serial copy(HasMem.MemArr[1:3].array[1:2]), seq
diff --git a/clang/test/Preprocessor/predefined-macros-hlsl.hlsl b/clang/test/Preprocessor/predefined-macros-hlsl.hlsl
index 251362cd03c0f8..cc5233fbcb2aca 100644
--- a/clang/test/Preprocessor/predefined-macros-hlsl.hlsl
+++ b/clang/test/Preprocessor/predefined-macros-hlsl.hlsl
@@ -1,14 +1,19 @@
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-amplification | FileCheck -match-full-lines %s --check-prefixes=CHECK,AMPLIFICATION
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-compute | FileCheck -match-full-lines %s --check-prefixes=CHECK,COMPUTE
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-domain | FileCheck -match-full-lines %s --check-prefixes=CHECK,DOMAIN
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-geometry | FileCheck -match-full-lines %s --check-prefixes=CHECK,GEOMETRY
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-hull | FileCheck -match-full-lines %s --check-prefixes=CHECK,HULL
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-library | FileCheck -match-full-lines %s --check-prefixes=CHECK,LIBRARY
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-mesh | FileCheck -match-full-lines %s --check-prefixes=CHECK,MESH
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-pixel | FileCheck -match-full-lines %s --check-prefixes=CHECK,PIXEL
-// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-vertex | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-amplification | FileCheck -match-full-lines %s --check-prefixes=CHECK,AMPLIFICATION,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-compute | FileCheck -match-full-lines %s --check-prefixes=CHECK,COMPUTE,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-domain | FileCheck -match-full-lines %s --check-prefixes=CHECK,DOMAIN,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-geometry | FileCheck -match-full-lines %s --check-prefixes=CHECK,GEOMETRY,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-hull | FileCheck -match-full-lines %s --check-prefixes=CHECK,HULL,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-library | FileCheck -match-full-lines %s --check-prefixes=CHECK,LIBRARY,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-mesh | FileCheck -match-full-lines %s --check-prefixes=CHECK,MESH,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-pixel | FileCheck -match-full-lines %s --check-prefixes=CHECK,PIXEL,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-vertex | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX,NOHALF
+// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.3-vertex -fnative-half-type | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX,HALF
+
+// HALF: #define __HLSL_ENABLE_16_BIT 1
+// NOHALF-NOT: __HLSL_ENABLE_16_BIT
 
 // CHECK: #define __HLSL_VERSION 2021
+
 // CHECK: #define __SHADER_STAGE_AMPLIFICATION 14
 // CHECK: #define __SHADER_STAGE_COMPUTE 5
 // CHECK: #define __SHADER_STAGE_DOMAIN 4
diff --git a/clang/test/TestRunner.sh b/clang/test/TestRunner.sh
deleted file mode 100755
index f96d3d552d2ee6..00000000000000
--- a/clang/test/TestRunner.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-#
-# TestRunner.sh - Backward compatible utility for testing an individual file.
-
-# Find where this script is.
-Dir=$(dirname $(which $0))
-AbsDir=$(cd $Dir; pwd)
-
-# Find 'lit', assuming standard layout.
-lit=$AbsDir/../../../utils/lit/lit.py
-
-# Dispatch to lit.
-$lit "$@"
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 191e944ae91e03..ae5b697b8eb9e0 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -594,9 +594,7 @@ getInterfaceFile(const StringRef Filename) {
   std::unique_ptr<InterfaceFile> IF;
   switch (identify_magic(Buffer->getBuffer())) {
   case file_magic::macho_dynamically_linked_shared_lib:
-    LLVM_FALLTHROUGH;
   case file_magic::macho_dynamically_linked_shared_lib_stub:
-    LLVM_FALLTHROUGH;
   case file_magic::macho_universal_binary:
     return DylibReader::get(Buffer->getMemBufferRef());
     break;
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 74163f30e19b1d..398a11a5703558 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -5713,8 +5713,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("UnaryOperator");
   case CXCursor_ArraySubscriptExpr:
     return cxstring::createRef("ArraySubscriptExpr");
-  case CXCursor_OMPArraySectionExpr:
-    return cxstring::createRef("OMPArraySectionExpr");
+  case CXCursor_ArraySectionExpr:
+    return cxstring::createRef("ArraySectionExpr");
   case CXCursor_OMPArrayShapingExpr:
     return cxstring::createRef("OMPArrayShapingExpr");
   case CXCursor_OMPIteratorExpr:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 454bf754986189..9325a16d2a8486 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -423,8 +423,8 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
     K = CXCursor_UnexposedExpr;
     break;
 
-  case Stmt::OMPArraySectionExprClass:
-    K = CXCursor_OMPArraySectionExpr;
+  case Stmt::ArraySectionExprClass:
+    K = CXCursor_ArraySectionExpr;
     break;
 
   case Stmt::OMPArrayShapingExprClass:
diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp
index 3e00e82d6df966..49f76bac704d68 100644
--- a/clang/unittests/Format/SortIncludesTest.cpp
+++ b/clang/unittests/Format/SortIncludesTest.cpp
@@ -6,19 +6,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "FormatTestUtils.h"
+#include "FormatTestBase.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 #include "gtest/gtest.h"
 
-#define DEBUG_TYPE "format-test"
+#define DEBUG_TYPE "sort-includes-test"
 
 namespace clang {
 namespace format {
 namespace {
 
-class SortIncludesTest : public ::testing::Test {
+class SortIncludesTest : public test::FormatTestBase {
 protected:
   std::vector<tooling::Range> GetCodeRange(StringRef Code) {
     return std::vector<tooling::Range>(1, tooling::Range(0, Code.size()));
@@ -819,6 +819,122 @@ TEST_F(SortIncludesTest, CalculatesCorrectCursorPositionWithRegrouping) {
   EXPECT_EQ(27u, newCursor(Code, 28)); // Start of last line
 }
 
+TEST_F(SortIncludesTest,
+       CalculatesCorrectCursorPositionWhenNoReplacementsWithRegroupingAndCRLF) {
+  Style.IncludeBlocks = Style.IBS_Regroup;
+  FmtStyle.LineEnding = FormatStyle::LE_CRLF;
+  Style.IncludeCategories = {
+      {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}};
+  std::string Code = "#include \"a\"\r\n" // Start of line: 0
+                     "\r\n"               // Start of line: 14
+                     "#include \"b\"\r\n" // Start of line: 16
+                     "\r\n"               // Start of line: 30
+                     "#include \"c\"\r\n" // Start of line: 32
+                     "\r\n"               // Start of line: 46
+                     "int i;";            // Start of line: 48
+  verifyNoChange(Code);
+  EXPECT_EQ(0u, newCursor(Code, 0));
+  EXPECT_EQ(14u, newCursor(Code, 14));
+  EXPECT_EQ(16u, newCursor(Code, 16));
+  EXPECT_EQ(30u, newCursor(Code, 30));
+  EXPECT_EQ(32u, newCursor(Code, 32));
+  EXPECT_EQ(46u, newCursor(Code, 46));
+  EXPECT_EQ(48u, newCursor(Code, 48));
+}
+
+TEST_F(
+    SortIncludesTest,
+    CalculatesCorrectCursorPositionWhenRemoveLinesReplacementsWithRegroupingAndCRLF) {
+  Style.IncludeBlocks = Style.IBS_Regroup;
+  FmtStyle.LineEnding = FormatStyle::LE_CRLF;
+  Style.IncludeCategories = {{".*", 0, 0, false}};
+  std::string Code = "#include \"a\"\r\n"     // Start of line: 0
+                     "\r\n"                   // Start of line: 14
+                     "#include \"b\"\r\n"     // Start of line: 16
+                     "\r\n"                   // Start of line: 30
+                     "#include \"c\"\r\n"     // Start of line: 32
+                     "\r\n"                   // Start of line: 46
+                     "int i;";                // Start of line: 48
+  std::string Expected = "#include \"a\"\r\n" // Start of line: 0
+                         "#include \"b\"\r\n" // Start of line: 14
+                         "#include \"c\"\r\n" // Start of line: 28
+                         "\r\n"               // Start of line: 42
+                         "int i;";            // Start of line: 44
+  EXPECT_EQ(Expected, sort(Code));
+  EXPECT_EQ(0u, newCursor(Code, 0));
+  EXPECT_EQ(
+      14u,
+      newCursor(Code, 14)); // cursor on empty line in include block is ignored
+  EXPECT_EQ(14u, newCursor(Code, 16));
+  EXPECT_EQ(
+      30u,
+      newCursor(Code, 30)); // cursor on empty line in include block is ignored
+  EXPECT_EQ(28u, newCursor(Code, 32));
+  EXPECT_EQ(42u, newCursor(Code, 46));
+  EXPECT_EQ(44u, newCursor(Code, 48));
+}
+
+// FIXME: the tests below should pass.
+#if 0
+TEST_F(
+    SortIncludesTest,
+    CalculatesCorrectCursorPositionWhenNewLineReplacementsWithRegroupingAndCRLF) {
+  Style.IncludeBlocks = Style.IBS_Regroup;
+  FmtStyle.LineEnding = FormatStyle::LE_CRLF;
+  Style.IncludeCategories = {
+      {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}};
+  std::string Code = "#include \"a\"\r\n"     // Start of line: 0
+                     "#include \"b\"\r\n"     // Start of line: 14
+                     "#include \"c\"\r\n"     // Start of line: 28
+                     "\r\n"                   // Start of line: 42
+                     "int i;";                // Start of line: 44
+  std::string Expected = "#include \"a\"\r\n" // Start of line: 0
+                         "\r\n"               // Start of line: 14
+                         "#include \"b\"\r\n" // Start of line: 16
+                         "\r\n"               // Start of line: 30
+                         "#include \"c\"\r\n" // Start of line: 32
+                         "\r\n"               // Start of line: 46
+                         "int i;";            // Start of line: 48
+  EXPECT_EQ(Expected, sort(Code));
+  EXPECT_EQ(0u, newCursor(Code, 0));
+  EXPECT_EQ(15u, newCursor(Code, 16));
+  EXPECT_EQ(30u, newCursor(Code, 32));
+  EXPECT_EQ(44u, newCursor(Code, 46));
+  EXPECT_EQ(46u, newCursor(Code, 48));
+}
+
+TEST_F(
+    SortIncludesTest,
+    CalculatesCorrectCursorPositionWhenNoNewLineReplacementsWithRegroupingAndCRLF) {
+  Style.IncludeBlocks = Style.IBS_Regroup;
+  FmtStyle.LineEnding = FormatStyle::LE_CRLF;
+  Style.IncludeCategories = {
+      {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}};
+  std::string Code = "#include \"a\"\r\n"     // Start of line: 0
+                     "\r\n"                   // Start of line: 14
+                     "#include \"c\"\r\n"     // Start of line: 16
+                     "\r\n"                   // Start of line: 30
+                     "#include \"b\"\r\n"     // Start of line: 32
+                     "\r\n"                   // Start of line: 46
+                     "int i;";                // Start of line: 48
+  std::string Expected = "#include \"a\"\r\n" // Start of line: 0
+                         "\r\n"               // Start of line: 14
+                         "#include \"b\"\r\n" // Start of line: 16
+                         "\r\n"               // Start of line: 30
+                         "#include \"c\"\r\n" // Start of line: 32
+                         "\r\n"               // Start of line: 46
+                         "int i;";            // Start of line: 48
+  EXPECT_EQ(Expected, sort(Code));
+  EXPECT_EQ(0u, newCursor(Code, 0));
+  EXPECT_EQ(14u, newCursor(Code, 14));
+  EXPECT_EQ(30u, newCursor(Code, 32));
+  EXPECT_EQ(30u, newCursor(Code, 30));
+  EXPECT_EQ(15u, newCursor(Code, 15));
+  EXPECT_EQ(44u, newCursor(Code, 46));
+  EXPECT_EQ(46u, newCursor(Code, 48));
+}
+#endif
+
 TEST_F(SortIncludesTest, DeduplicateIncludes) {
   EXPECT_EQ("#include <a>\n"
             "#include <b>\n"
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index b2a1069a9a61cc..31d91ef3c73928 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -28,7 +28,7 @@ void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer,
                                          const char *format, const DataInfo *DI,
                                          const char *strip_path_prefix) {
   RenderContext(buffer);
-  buffer->AppendF(kFormatData, DI->start);
+  buffer->AppendF(kFormatData, reinterpret_cast<void *>(DI->start));
 }
 
 bool MarkupStackTracePrinter::RenderNeedsSymbolization(const char *format) {
@@ -43,12 +43,13 @@ void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer,
                                           const char *strip_path_prefix) {
   CHECK(!RenderNeedsSymbolization(format));
   RenderContext(buffer);
-  buffer->AppendF(kFormatFrame, frame_no, address);
+  buffer->AppendF(kFormatFrame, frame_no, reinterpret_cast<void *>(address));
 }
 
 bool MarkupSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   char buffer[kFormatFunctionMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction,
+                    reinterpret_cast<void *>(addr));
   stack->info.function = internal_strdup(buffer);
   return true;
 }
@@ -118,7 +119,8 @@ static void RenderMmaps(InternalScopedString *buffer,
     // module.base_address == dlpi_addr
     // range.beg == dlpi_addr + p_vaddr
     // relative address == p_vaddr == range.beg - module.base_address
-    buffer->AppendF(kFormatMmap, range.beg, range.end - range.beg, moduleId,
+    buffer->AppendF(kFormatMmap, reinterpret_cast<void *>(range.beg),
+                    range.end - range.beg, static_cast<int>(moduleId),
                     accessBuffer.data(), range.beg - module.base_address());
 
     buffer->Append("\n");
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
index 83643504e1289e..a43661eaecf2ff 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h
@@ -33,13 +33,13 @@ constexpr uptr kFormatFunctionMax = 64;  // More than big enough for 64-bit hex.
 constexpr const char *kFormatData = "{{{data:%p}}}";
 
 // One frame in a backtrace (printed on a line by itself).
-constexpr const char *kFormatFrame = "{{{bt:%u:%p}}}";
+constexpr const char *kFormatFrame = "{{{bt:%d:%p}}}";
 
 // Module contextual element.
-constexpr const char *kFormatModule = "{{{module:%d:%s:elf:%s}}}";
+constexpr const char *kFormatModule = "{{{module:%zu:%s:elf:%s}}}";
 
 // mmap for a module segment.
-constexpr const char *kFormatMmap = "{{{mmap:%p:0x%x:load:%d:%s:0x%x}}}";
+constexpr const char *kFormatMmap = "{{{mmap:%p:0x%zx:load:%d:%s:0x%zx}}}";
 
 // Dump trigger element.
 #define FORMAT_DUMPFILE "{{{dumpfile:%s:%s}}}"
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
index 5f3c8b81c07b31..fc793abf44cda5 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
+++ b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
@@ -108,9 +108,9 @@ bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name,
   // Create the VMO.
   zx_status_t Status = _zx_vmo_create(Size, 0, &Vmo);
   if (UNLIKELY(Status != ZX_OK)) {
-    if (!IsNoMemError(Status) || !AllowNoMem)
-      dieOnError(Status, "zx_vmo_create", Size);
-    return false;
+    if (AllowNoMem && IsNoMemError(Status))
+      return false;
+    dieOnError(Status, "zx_vmo_create", Size);
   }
 
   if (Name != nullptr)
@@ -123,15 +123,15 @@ bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name,
   Status =
       _zx_vmar_map(_zx_vmar_root_self(), MapFlags, 0, Vmo, 0, Size, &MapAddr);
   if (UNLIKELY(Status != ZX_OK)) {
-    if (!IsNoMemError(Status) || !AllowNoMem)
-      dieOnError(Status, "zx_vmar_map", Size);
-
-    Status = _zx_handle_close(Vmo);
-    CHECK_EQ(Status, ZX_OK);
+    if (AllowNoMem && IsNoMemError(Status)) {
+      Status = _zx_handle_close(Vmo);
+      CHECK_EQ(Status, ZX_OK);
 
-    MapAddr = 0;
-    Vmo = ZX_HANDLE_INVALID;
-    return false;
+      MapAddr = 0;
+      Vmo = ZX_HANDLE_INVALID;
+      return false;
+    }
+    dieOnError(Status, "zx_vmar_map", Size);
   }
 
   if (PreCommit) {
@@ -194,9 +194,9 @@ bool MemMapFuchsia::remapImpl(uptr Addr, uptr Size, const char *Name,
       _zx_vmar_map(_zx_vmar_root_self(), MapFlags, Addr - getRootVmarBase(),
                    Vmo, Addr - MapAddr, Size, &MappedAddr);
   if (UNLIKELY(Status != ZX_OK)) {
-    if (!IsNoMemError(Status) || !AllowNoMem)
-      dieOnError(Status, "zx_vmar_map", Size);
-    return false;
+    if (AllowNoMem && IsNoMemError(Status))
+      return false;
+    dieOnError(Status, "zx_vmar_map", Size);
   }
   DCHECK_EQ(Addr, MappedAddr);
 
@@ -234,9 +234,9 @@ bool ReservedMemoryFuchsia::createImpl(UNUSED uptr Addr, uptr Size,
   zx_status_t Status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_ALLOW_FAULTS, 0,
                                     getPlaceholderVmo(), 0, Size, &Base);
   if (UNLIKELY(Status != ZX_OK)) {
-    if (!IsNoMemError(Status) || !AllowNoMem)
-      dieOnError(Status, "zx_vmar_map", Size);
-    return false;
+    if (AllowNoMem && IsNoMemError(Status))
+      return false;
+    dieOnError(Status, "zx_vmar_map", Size);
   }
 
   Capacity = Size;
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 61d57976ae43b6..d6119051b1622f 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -884,9 +884,10 @@ template <typename Config> class SizeClassAllocator64 {
         ScopedLock ML(Region->MMLock);
 
         const bool RegionIsExhausted = Region->Exhausted;
-        if (!RegionIsExhausted)
+        if (!RegionIsExhausted) {
           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
                                                   MaxBlockCount);
+        }
         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
 
         {
@@ -1019,7 +1020,6 @@ template <typename Config> class SizeClassAllocator64 {
                                           MAP_ALLOWNOMEM))) {
         Printf("Can't reserve pages for size class %zu.\n",
                getSizeByClassId(ClassId));
-        Region->Exhausted = true;
         return 0U;
       }
       initRegion(Region, ClassId,
diff --git a/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in b/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in
index 32a8c48e9c1c79..3fa9a7a2780e24 100644
--- a/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in
+++ b/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in
@@ -23,5 +23,6 @@ config.test_source_root = config.test_exec_root
 # host triple as the trailing path component. The value is incorrect for i386
 # tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as
 # target, and we don't support different environments for building and,
-# respectivelly, running tests, we shouldn't see this case.
-assert not config.enable_per_target_runtime_dir or config.target_arch == config.host_arch
+# respectively, running tests, we we only need to fix up the x86_64 case.
+if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch:
+    config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', config.compiler_rt_libdir)
diff --git a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
index c968e403a44d09..1e2442a1487a43 100644
--- a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
+++ b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in
@@ -21,10 +21,11 @@ config.test_source_root = config.test_exec_root
 # When LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=on, the initial value of
 # config.compiler_rt_libdir (COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR) has the
 # host triple as the trailing path component. The value is incorrect for i386
-# tests on x86_64 hosts and vice versa.  But, since only x86_64 is enabled as
+# tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as
 # target, and we don't support different environments for building and,
-# respectivelly, running tests, we shouldn't see this case.
-assert not config.enable_per_target_runtime_dir or config.target_arch == config.host_arch
+# respectively, running tests, we we only need to fix up the x86_64 case.
+if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch:
+    config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', config.compiler_rt_libdir)
 
 if not config.parallelism_group:
-  config.parallelism_group = 'shadow-memory'
\ No newline at end of file
+  config.parallelism_group = 'shadow-memory'
diff --git a/flang/include/flang/Common/visit.h b/flang/include/flang/Common/visit.h
index 4d0897301e01db..d867338be7e0f5 100644
--- a/flang/include/flang/Common/visit.h
+++ b/flang/include/flang/Common/visit.h
@@ -40,11 +40,17 @@ inline RT_API_ATTRS RESULT Log2VisitHelper(
       return visitor(std::get<(LOW + N)>(std::forward<VARIANT>(u))...); \
     }
       VISIT_CASE_N(1)
+      [[fallthrough]];
       VISIT_CASE_N(2)
+      [[fallthrough]];
       VISIT_CASE_N(3)
+      [[fallthrough]];
       VISIT_CASE_N(4)
+      [[fallthrough]];
       VISIT_CASE_N(5)
+      [[fallthrough]];
       VISIT_CASE_N(6)
+      [[fallthrough]];
       VISIT_CASE_N(7)
 #undef VISIT_CASE_N
     }
@@ -82,7 +88,7 @@ inline RT_API_ATTRS auto visit(VISITOR &&visitor, VARIANT &&...u)
 // Some versions of clang have bugs that cause compilation to hang
 // on these templates.  MSVC and older GCC versions may work but are
 // not well tested.  So enable only for GCC 9 and better.
-#if __GNUC__ < 9
+#if __GNUC__ < 9 && !defined(__clang__)
 #define FLANG_USE_STD_VISIT
 #endif
 
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 87a714d17015cb..531616e7926aca 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -861,7 +861,6 @@ getOutputStream(CompilerInstance &ci, llvm::StringRef inFile,
     return ci.createDefaultOutputFile(
         /*Binary=*/false, inFile, /*extension=*/"ll");
   case BackendActionTy::Backend_EmitFIR:
-    LLVM_FALLTHROUGH;
   case BackendActionTy::Backend_EmitHLFIR:
     return ci.createDefaultOutputFile(
         /*Binary=*/false, inFile, /*extension=*/"mlir");
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 8b62fe8c022f80..f66607dfa22f1b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3794,7 +3794,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           auto needCleanup = fir::getIntIfConstant(cleanup);
           if (needCleanup && *needCleanup)
             temps.push_back(temp);
-          addSymbol(sym, temp, /*forced=*/true);
+          addSymbol(sym,
+                    hlfir::translateToExtendedValue(loc, builder, temp).first,
+                    /*forced=*/true);
           builder.create<fir::CUDADataTransferOp>(loc, addr, temp,
                                                   transferKindAttr);
           ++nbDeviceResidentObject;
@@ -3810,12 +3812,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Location loc = getCurrentLocation();
     fir::FirOpBuilder &builder = getFirOpBuilder();
 
+    bool isInDeviceContext =
+        builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
     bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
                           Fortran::evaluate::HasCUDAAttrs(assign.rhs);
     bool hasCUDAImplicitTransfer =
         Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
     llvm::SmallVector<mlir::Value> implicitTemps;
-    if (hasCUDAImplicitTransfer)
+    if (hasCUDAImplicitTransfer && !isInDeviceContext)
       implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign);
 
     // Gather some information about the assignment that will impact how it is
@@ -3874,13 +3878,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       Fortran::lower::StatementContext localStmtCtx;
       hlfir::Entity rhs = evaluateRhs(localStmtCtx);
       hlfir::Entity lhs = evaluateLhs(localStmtCtx);
-      if (isCUDATransfer && !hasCUDAImplicitTransfer)
+      if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
         genCUDADataTransfer(builder, loc, assign, lhs, rhs);
       else
         builder.create<hlfir::AssignOp>(loc, rhs, lhs,
                                         isWholeAllocatableAssignment,
                                         keepLhsLengthInAllocatableAssignment);
-      if (hasCUDAImplicitTransfer) {
+      if (hasCUDAImplicitTransfer && !isInDeviceContext) {
         localSymbols.popScope();
         for (mlir::Value temp : implicitTemps)
           builder.create<fir::FreeMemOp>(loc, temp);
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 4ebd736315bcbc..70483685d20019 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -98,7 +98,7 @@ end
 
 ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
 ! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
-! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %21#0 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
+! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
 ! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
 ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
 ! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
@@ -119,3 +119,25 @@ end
 ! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
 ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
 ! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
+
+
+! Check that fir.cuda_data_transfer are not generated within cuf kernel
+subroutine sub4()
+  integer, parameter :: n = 10
+  real, device :: adev(n)
+  real :: ahost(n)
+  real :: b
+  integer :: i
+
+  adev = ahost
+  !$cuf kernel do <<<*,*>>>
+  do i = 1, n
+    adev(i) = adev(i) + b
+  enddo
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub4()
+! CHECK: fir.cuda_data_transfer
+! CHECK: fir.cuda_kernel<<<*, *>>>
+! CHECK-NOT: fir.cuda_data_transfer
+! CHECK: hlfir.assign
diff --git a/flang/test/Lower/OpenMP/FIR/if-clause.f90 b/flang/test/Lower/OpenMP/FIR/if-clause.f90
index f686b9708fc54a..683d9f7ef97267 100644
--- a/flang/test/Lower/OpenMP/FIR/if-clause.f90
+++ b/flang/test/Lower/OpenMP/FIR/if-clause.f90
@@ -1,7 +1,9 @@
 ! This test checks lowering of OpenMP IF clauses.
 
-! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
+! The "if" clause was added to the "simd" directive in OpenMP 5.0, and
+! to the "teams" directive in OpenMP 5.2.
+! RUN: bbc -fopenmp -fopenmp-version=52 -emit-fir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-fir %s -o - | FileCheck %s
 
 program main
   integer :: i
diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90
index db7d30295c45d9..91e8750578bfb4 100644
--- a/flang/test/Lower/OpenMP/FIR/simd.f90
+++ b/flang/test/Lower/OpenMP/FIR/simd.f90
@@ -1,6 +1,7 @@
 ! Tests for 2.9.3.1 Simd
 
-! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s
+! The "if" clause was added to the "simd" directive in OpenMP 5.0.
+! RUN: bbc -fopenmp -fopenmp-version=50 -emit-fir -hlfir=false %s -o - | FileCheck %s
 
 !CHECK-LABEL: func @_QPsimd()
 subroutine simd
diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90
index a7344e02cf7cca..ea4b9240e9e514 100644
--- a/flang/test/Lower/OpenMP/FIR/target.f90
+++ b/flang/test/Lower/OpenMP/FIR/target.f90
@@ -1,4 +1,5 @@
-!RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s
+! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1.
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s
 
 !===============================================================================
 ! Target_Enter Simple
diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90
index ce4427a0c2cab2..7c15c275d8cc9d 100644
--- a/flang/test/Lower/OpenMP/if-clause.f90
+++ b/flang/test/Lower/OpenMP/if-clause.f90
@@ -1,7 +1,9 @@
 ! This test checks lowering of OpenMP IF clauses.
 
-! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
-! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! The "if" clause was added to the "simd" directive in OpenMP 5.0, and
+! to the "teams" directive in OpenMP 5.2.
+! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck %s
 
 program main
   integer :: i
diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90
index 190aa615212176..8ec1a3cefb4a60 100644
--- a/flang/test/Lower/OpenMP/simd.f90
+++ b/flang/test/Lower/OpenMP/simd.f90
@@ -1,7 +1,8 @@
 ! Tests for 2.9.3.1 Simd
 
-!RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s
-!RUN: bbc -hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! The "if" clause was added to the "simd" directive in OpenMP 5.0.
+! RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+! RUN: bbc -hlfir -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 
 !CHECK-LABEL: func @_QPsimd()
 subroutine simd
diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90
index 0f0c736d316250..44f77b5c33607b 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -1,4 +1,5 @@
-!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s
 
 !===============================================================================
 ! Target_Enter Simple
diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
index d849dd206b9439..90eede4f84108f 100644
--- a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
+++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
@@ -1,5 +1,6 @@
-!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
-!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! The "use_device_addr" was added to the "target data" directive in OpenMP 5.0.
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
 
 ! This tests primary goal is to check the promotion of 
 ! non-CPTR arguments from use_device_ptr to 
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 8724f321a9d117..938ab76c6ecbdd 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -50,6 +50,7 @@ Implemented Papers
 - P1659R3 - ``std::ranges::starts_with`` and ``std::ranges::ends_with``
 - P3029R1 - Better ``mdspan``'s CTAD
 - P2387R3 - Pipe support for user-defined range adaptors
+- P2713R1 - Escaping improvements in ``std::format``
 
 Improvements and New Features
 -----------------------------
diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv
index f75dd288304b27..01387a404f5d67 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -108,7 +108,7 @@
 "`P2164R9 <https://wg21.link/P2164R9>`__","LWG", "``views::enumerate``","February 2023","","","|ranges|"
 "`P2711R1 <https://wg21.link/P2711R1>`__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","|In Progress| [#note-P2711R1]_","","|ranges|"
 "`P2609R3 <https://wg21.link/P2609R3>`__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|"
-"`P2713R1 <https://wg21.link/P2713R1>`__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|"
+"`P2713R1 <https://wg21.link/P2713R1>`__","LWG", "Escaping improvements in ``std::format``","February 2023","|Complete|","19.0","|format|"
 "`P2675R1 <https://wg21.link/P2675R1>`__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|"
 "`P2572R1 <https://wg21.link/P2572R1>`__","LWG", "``std::format`` fill character allowances","February 2023","|Complete|","17.0","|format|"
 "`P2693R1 <https://wg21.link/P2693R1>`__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|"
diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv
index 008f7418ab9c05..666be319757c17 100644
--- a/libcxx/docs/Status/Cxx2cIssues.csv
+++ b/libcxx/docs/Status/Cxx2cIssues.csv
@@ -32,7 +32,7 @@
 "`3951 <https://wg21.link/LWG3951>`__","[expected.object.swap]: Using ``value()`` instead of ``has_value()``","Kona November 2023","","",""
 "`3953 <https://wg21.link/LWG3953>`__","``iter_move`` for ``common_iterator`` and ``counted_iterator`` should return ``decltype(auto)``","Kona November 2023","","","|ranges|"
 "`3957 <https://wg21.link/LWG3957>`__","[container.alloc.reqmts] The value category of v should be claimed","Kona November 2023","","",""
-"`3965 <https://wg21.link/LWG3965>`__","Incorrect example in [format.string.escaped] p3 for formatting of combining characters","Kona November 2023","","","|format|"
+"`3965 <https://wg21.link/LWG3965>`__","Incorrect example in [format.string.escaped] p3 for formatting of combining characters","Kona November 2023","|Complete|","19.0","|format|"
 "`3970 <https://wg21.link/LWG3970>`__","[mdspan.syn] Missing definition of ``full_extent_t`` and ``full_extent``","Kona November 2023","","",""
 "`3973 <https://wg21.link/LWG3973>`__","Monadic operations should be ADL-proof","Kona November 2023","","",""
 "`3974 <https://wg21.link/LWG3974>`__","``mdspan::operator[]`` should not copy ``OtherIndexTypes``","Kona November 2023","","",""
@@ -49,7 +49,7 @@
 "`4012 <https://wg21.link/LWG4012>`__","``common_view::begin/end`` are missing the ``simple-view`` check","Tokyo March 2024","","","|ranges|"
 "`4013 <https://wg21.link/LWG4013>`__","``lazy_split_view::outer-iterator::value_type`` should not provide default constructor","Tokyo March 2024","","","|ranges|"
 "`4016 <https://wg21.link/LWG4016>`__","container-insertable checks do not match what container-inserter does","Tokyo March 2024","","",""
-"`4023 <https://wg21.link/LWG4023>`__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","","",""
+"`4023 <https://wg21.link/LWG4023>`__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","|Complete|","19.0",""
 "`4025 <https://wg21.link/LWG4025>`__","Move assignment operator of ``std::expected<cv void, E>`` should not be conditionally deleted","Tokyo March 2024","","",""
 "`4030 <https://wg21.link/LWG4030>`__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","|Nothing To Do|","",""
 "`4031 <https://wg21.link/LWG4031>`__","``bad_expected_access<void>`` member functions should be ``noexcept``","Tokyo March 2024","|Complete|","16.0",""
diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv
index 7da77def92daa2..3780c1ed5c1279 100644
--- a/libcxx/docs/Status/FormatIssues.csv
+++ b/libcxx/docs/Status/FormatIssues.csv
@@ -10,7 +10,7 @@ Number,Name,Standard,Assignee,Status,First released version
 "`P2508R1 <https://wg21.link/P2508R1>`__","Exposing ``std::basic-format-string``","C++23","Mark de Wever","|Complete|",15.0
 "`P2585R0 <https://wg21.link/P2585R0>`__","Improving default container formatting","C++23","Mark de Wever","|Complete|",17.0
 "`P2539R4 <https://wg21.link/P2539R4>`__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever","|Complete|","18.0"
-"`P2713R1 <https://wg21.link/P2713R1>`__","Escaping improvements in ``std::format``","C++23","Mark de Wever",""
+"`P2713R1 <https://wg21.link/P2713R1>`__","Escaping improvements in ``std::format``","C++23","Mark de Wever","|Complete|",19.0
 "`P2675R1 <https://wg21.link/P2675R1>`__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|",17.0
 "`P2572R1 <https://wg21.link/P2572R1>`__","``std::format`` fill character allowances","C++23","Mark de Wever","|Complete|",17.0
 "`P2693R1 <https://wg21.link/P2693R1>`__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In Progress|"
diff --git a/libcxx/include/__availability b/libcxx/include/__availability
index aa761eb5bfe5e3..7a02ae00846bfa 100644
--- a/libcxx/include/__availability
+++ b/libcxx/include/__availability
@@ -28,30 +28,32 @@
 // that previously released library. Normally, this would be a load-time error
 // when one tries to launch the program against the older library.
 //
-// For example, the filesystem library was introduced in the dylib in macOS 10.15.
-// If a user compiles on a macOS 10.15 host but targets macOS 10.13 with their
-// program, the compiler would normally not complain (because the required
-// declarations are in the headers), but the dynamic loader would fail to find
-// the symbols when actually trying to launch the program on macOS 10.13. To
-// turn this into a compile-time issue instead, declarations are annotated with
-// when they were introduced, and the compiler can produce a diagnostic if the
-// program references something that isn't available on the deployment target.
+// For example, the filesystem library was introduced in the dylib in LLVM 9.
+// On Apple platforms, this corresponds to macOS 10.15. If a user compiles on
+// a macOS 10.15 host but targets macOS 10.13 with their program, the compiler
+// would normally not complain (because the required declarations are in the
+// headers), but the dynamic loader would fail to find the symbols when actually
+// trying to launch the program on macOS 10.13. To turn this into a compile-time
+// issue instead, declarations are annotated with when they were introduced, and
+// the compiler can produce a diagnostic if the program references something that
+// isn't available on the deployment target.
 //
 // This mechanism is general in nature, and any vendor can add their markup to
 // the library (see below). Whenever a new feature is added that requires support
 // in the shared library, two macros are added below to allow marking the feature
 // as unavailable:
-// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_NO_<feature>` which must be defined
-//    exactly when compiling for a target that doesn't support the feature.
-// 2. A macro named `_LIBCPP_AVAILABILITY_<feature>`, which must always be defined
-//    and must expand to the proper availability attribute for the platform.
+// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_<feature>` which must be defined
+//    to `_LIBCPP_INTRODUCED_IN_<version>` for the appropriate LLVM version.
+// 2. A macro named `_LIBCPP_AVAILABILITY_<feature>`, which must be defined to
+//    `_LIBCPP_INTRODUCED_IN_<version>_MARKUP` for the appropriate LLVM version.
 //
 // When vendors decide to ship the feature as part of their shared library, they
-// can update these macros appropriately for their platform, and the library will
-// use those to provide an optimal user experience.
+// can update the `_LIBCPP_INTRODUCED_IN_<version>` macro (and the markup counterpart)
+// based on the platform version they shipped that version of LLVM in. The library
+// will then use this markup to provide an optimal user experience on these platforms.
 //
 // Furthermore, many features in the standard library have corresponding
-// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_NO_<feature>` macros
+// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_<feature>` macros
 // are checked by the corresponding feature-test macros generated by
 // generate_feature_test_macro_components.py to ensure that the library
 // doesn't announce a feature as being implemented if it is unavailable on
@@ -74,237 +76,181 @@
 
 // Availability markup is disabled when building the library, or when a non-Clang
 // compiler is used because only Clang supports the necessary attributes.
-// doesn't support the proper attributes.
 #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED)
 #  if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
 #    define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS
 #  endif
 #endif
 
+// When availability annotations are disabled, we take for granted that features introduced
+// in all versions of the library are available.
 #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
 
-// These macros control the availability of std::bad_optional_access and
-// other exception types. These were put in the shared library to prevent
-// code bloat from every user program defining the vtable for these exception
-// types.
-//
-// Note that when exceptions are disabled, the methods that normally throw
-// these exceptions can be used even on older deployment targets, but those
-// methods will abort instead of throwing.
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS 1
-#  define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS
-
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS 1
-#  define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS
+#  define _LIBCPP_INTRODUCED_IN_LLVM_4 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP /* nothing */
 
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST 1
-#  define _LIBCPP_AVAILABILITY_BAD_ANY_CAST
+#  define _LIBCPP_INTRODUCED_IN_LLVM_9 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP      /* nothing */
+#  define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH /* nothing */
+#  define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP  /* nothing */
 
-// These macros control the availability of all parts of <filesystem> that
-// depend on something in the dylib.
-#  define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 1
-#  define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY
-#  define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH
-#  define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP
+#  define _LIBCPP_INTRODUCED_IN_LLVM_10 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP /* nothing */
 
-// This controls the availability of the C++20 synchronization library,
-// which requires shared library support for various operations
-// (see libcxx/src/atomic.cpp). This includes <barier>, <latch>,
-// <semaphore>, and notification functions on std::atomic.
-#  define _LIBCPP_AVAILABILITY_HAS_SYNC 1
-#  define _LIBCPP_AVAILABILITY_SYNC
+#  define _LIBCPP_INTRODUCED_IN_LLVM_12 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP /* nothing */
 
-// Enable additional explicit instantiations of iostreams components. This
-// reduces the number of weak definitions generated in programs that use
-// iostreams by providing a single strong definition in the shared library.
-//
-// TODO: Enable additional explicit instantiations on GCC once it supports exclude_from_explicit_instantiation,
-//       or once libc++ doesn't use the attribute anymore.
-// TODO: Enable them on Windows once https://llvm.org/PR41018 has been fixed.
-#  if !defined(_LIBCPP_COMPILER_GCC) && !defined(_WIN32)
-#    define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1
-#  else
-#    define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0
-#  endif
+#  define _LIBCPP_INTRODUCED_IN_LLVM_14 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP /* nothing */
 
-// This controls the availability of floating-point std::to_chars functions.
-// These overloads were added later than the integer overloads.
-#  define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 1
-#  define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT
+#  define _LIBCPP_INTRODUCED_IN_LLVM_15 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP /* nothing */
 
-// This controls whether the library claims to provide a default verbose
-// termination function, and consequently whether the headers will try
-// to use it when the mechanism isn't overriden at compile-time.
-#  define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT 1
-#  define _LIBCPP_AVAILABILITY_VERBOSE_ABORT
+#  define _LIBCPP_INTRODUCED_IN_LLVM_16 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP /* nothing */
 
-// This controls the availability of the C++17 std::pmr library,
-// which is implemented in large part in the built library.
-#  define _LIBCPP_AVAILABILITY_HAS_PMR 1
-#  define _LIBCPP_AVAILABILITY_PMR
+#  define _LIBCPP_INTRODUCED_IN_LLVM_18 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP /* nothing */
 
-// These macros controls the availability of __cxa_init_primary_exception
-// in the built library, which std::make_exception_ptr might use
-// (see libcxx/include/__exception/exception_ptr.h).
-#  define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION 1
-#  define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION
-
-// This controls the availability of C++23 <print>, which
-// has a dependency on the built library (it needs access to
-// the underlying buffer types of std::cout, std::cerr, and std::clog.
-#  define _LIBCPP_AVAILABILITY_HAS_PRINT 1
-#  define _LIBCPP_AVAILABILITY_PRINT
-
-// This controls the availability of the C++20 time zone database.
-// The parser code is built in the library.
-#  define _LIBCPP_AVAILABILITY_HAS_TZDB 1
-#  define _LIBCPP_AVAILABILITY_TZDB
-
-// These macros determine whether we assume that std::bad_function_call and
-// std::bad_expected_access provide a key function in the dylib. This allows
-// centralizing their vtable and typeinfo instead of having all TUs provide
-// a weak definition that then gets deduplicated.
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 1
-#  define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 1
-#  define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION
+#  define _LIBCPP_INTRODUCED_IN_LLVM_19 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP /* nothing */
 
 #elif defined(__APPLE__)
 
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS                                                                 \
-    (!defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) || __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ >= 50000)
-
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS
-
-#  define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS __attribute__((availability(watchos, strict, introduced = 5.0)))
-#  define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS
-#  define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS
-
-// TODO: Update once this is released
-#  define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION 0
-#  define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION __attribute__((unavailable))
+// LLVM 4
+#  if defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 50000
+#    define _LIBCPP_INTRODUCED_IN_LLVM_4 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP __attribute__((availability(watchos, strict, introduced = 5.0)))
+#  else
+#    define _LIBCPP_INTRODUCED_IN_LLVM_4 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP /* nothing */
+#  endif
 
-// <filesystem>
+// LLVM 9
 // clang-format off
 #  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) ||   \
       (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \
       (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) ||         \
       (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000)
 // clang-format on
-#    define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP                                                                        \
+      __attribute__((availability(macos, strict, introduced = 10.15)))                                                 \
+      __attribute__((availability(ios, strict, introduced = 13.0)))                                                    \
+      __attribute__((availability(tvos, strict, introduced = 13.0)))                                                   \
+      __attribute__((availability(watchos, strict, introduced = 6.0)))
+// clang-format off
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH                                                                               \
+      _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \
+      _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))")    \
+      _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))")   \
+      _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))")
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP                                                                    \
+      _Pragma("clang attribute pop") \
+      _Pragma("clang attribute pop") \
+      _Pragma("clang attribute pop") \
+      _Pragma("clang attribute pop")
+// clang-format on
 #  else
-#    define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP      /* nothing */
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH /* nothing */
+#    define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP  /* nothing */
 #  endif
-#  define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY                                                                      \
-    __attribute__((availability(macos, strict, introduced = 10.15)))                                                   \
-    __attribute__((availability(ios, strict, introduced = 13.0)))                                                      \
-    __attribute__((availability(tvos, strict, introduced = 13.0)))                                                     \
-    __attribute__((availability(watchos, strict, introduced = 6.0)))
+
+// LLVM 10
 // clang-format off
-#   define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH                                 \
-        _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \
-        _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))")    \
-        _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))")   \
-        _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))")
-#   define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP                                  \
-        _Pragma("clang attribute pop")                                          \
-        _Pragma("clang attribute pop")                                          \
-        _Pragma("clang attribute pop")                                          \
-        _Pragma("clang attribute pop")
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) ||   \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) ||         \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000)
 // clang-format on
+#    define _LIBCPP_INTRODUCED_IN_LLVM_10 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP                                                                       \
+      __attribute__((availability(macos, strict, introduced = 11.0)))                                                  \
+      __attribute__((availability(ios, strict, introduced = 14.0)))                                                    \
+      __attribute__((availability(tvos, strict, introduced = 14.0)))                                                   \
+      __attribute__((availability(watchos, strict, introduced = 7.0)))
+#  else
+#    define _LIBCPP_INTRODUCED_IN_LLVM_10 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP /* nothing */
+#  endif
 
-// std::to_chars(floating-point)
+// LLVM 12
 // clang-format off
-#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130300) ||   \
-      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160300) || \
-      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160300) ||         \
-      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90300)
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 120000)   || \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150000)         || \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80000)
 // clang-format on
-#    define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_12 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP                                                                       \
+      __attribute__((availability(macos, strict, introduced = 12.0)))                                                  \
+      __attribute__((availability(ios, strict, introduced = 15.0)))                                                    \
+      __attribute__((availability(tvos, strict, introduced = 15.0)))                                                   \
+      __attribute__((availability(watchos, strict, introduced = 8.0)))
 #  else
-#    define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_12 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP /* nothing */
 #  endif
-#  define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT                                                                 \
-    __attribute__((availability(macos, strict, introduced = 13.3)))                                                    \
-    __attribute__((availability(ios, strict, introduced = 16.3)))                                                      \
-    __attribute__((availability(tvos, strict, introduced = 16.3)))                                                     \
-    __attribute__((availability(watchos, strict, introduced = 9.3)))
 
-// c++20 synchronization library
+// LLVM 14
 // clang-format off
-#   if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) ||   \
-       (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \
-       (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) ||         \
-       (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000)
+#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130400) ||   \
+      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160500) || \
+      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160500) ||         \
+      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500)
 // clang-format on
-#    define _LIBCPP_AVAILABILITY_HAS_SYNC 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_14 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP                                                                       \
+      __attribute__((availability(macos, strict, introduced = 13.4)))                                                  \
+      __attribute__((availability(ios, strict, introduced = 16.5)))                                                    \
+      __attribute__((availability(tvos, strict, introduced = 16.5)))                                                   \
+      __attribute__((availability(watchos, strict, introduced = 9.5)))
 #  else
-#    define _LIBCPP_AVAILABILITY_HAS_SYNC 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_14 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP /* nothing */
 #  endif
-#  define _LIBCPP_AVAILABILITY_SYNC                                                                                    \
-    __attribute__((availability(macos, strict, introduced = 11.0)))                                                    \
-    __attribute__((availability(ios, strict, introduced = 14.0)))                                                      \
-    __attribute__((availability(tvos, strict, introduced = 14.0)))                                                     \
-    __attribute__((availability(watchos, strict, introduced = 7.0)))
-
-// __libcpp_verbose_abort
-// TODO: Update once this is released
-#  define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT 0
 
-#  define _LIBCPP_AVAILABILITY_VERBOSE_ABORT __attribute__((unavailable))
-
-// std::pmr
+// LLVM 15-16
+#  define _LIBCPP_INTRODUCED_IN_LLVM_15 _LIBCPP_INTRODUCED_IN_LLVM_16
+#  define _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP
 // clang-format off
 #  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 140000) ||   \
       (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 170000) || \
       (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 170000) ||         \
       (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 100000)
 // clang-format on
-#    define _LIBCPP_AVAILABILITY_HAS_PMR 0
-#  else
-#    define _LIBCPP_AVAILABILITY_HAS_PMR 1
-#  endif
-// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed
-//       Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support
-//       it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we
-//       use availability annotations until that bug has been fixed.
-#  if 0
-#    define _LIBCPP_AVAILABILITY_PMR                                                                                   \
+#    define _LIBCPP_INTRODUCED_IN_LLVM_16 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP                                                                       \
       __attribute__((availability(macos, strict, introduced = 14.0)))                                                  \
       __attribute__((availability(ios, strict, introduced = 17.0)))                                                    \
       __attribute__((availability(tvos, strict, introduced = 17.0)))                                                   \
       __attribute__((availability(watchos, strict, introduced = 10.0)))
 #  else
-#    define _LIBCPP_AVAILABILITY_PMR
+#    define _LIBCPP_INTRODUCED_IN_LLVM_16 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP /* nothing */
 #  endif
 
-#  define _LIBCPP_AVAILABILITY_HAS_TZDB 0
-#  define _LIBCPP_AVAILABILITY_TZDB __attribute__((unavailable))
-
-// Warning: This availability macro works differently than the other macros.
-// The dylib part of print is not needed on Apple platforms. Therefore when
-// the macro is not available the code calling the dylib is commented out.
-// The macro _LIBCPP_AVAILABILITY_PRINT is not used.
-#  define _LIBCPP_AVAILABILITY_HAS_PRINT 0
-#  define _LIBCPP_AVAILABILITY_PRINT __attribute__((unavailable))
-
-// clang-format off
-#  if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 120000)   || \
-      (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \
-      (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150000)         || \
-      (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80000)
-// clang-format on
-#    define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0
+// LLVM 18
+// TODO: Fill this in
+#  if 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_18 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP __attribute__((unavailable))
 #  else
-#    define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_18 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP /* nothing */
 #  endif
 
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 0
-#  define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION __attribute__((unavailable))
-
-#  define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 0
-#  define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION __attribute__((unavailable))
+// LLVM 19
+// TODO: Fill this in
+#  if 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19 0
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP __attribute__((unavailable))
+#  else
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19 1
+#    define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP /* nothing */
+#  endif
 
 #else
 
@@ -315,6 +261,97 @@
 
 #endif
 
+// These macros control the availability of std::bad_optional_access and
+// other exception types. These were put in the shared library to prevent
+// code bloat from every user program defining the vtable for these exception
+// types.
+//
+// Note that when exceptions are disabled, the methods that normally throw
+// these exceptions can be used even on older deployment targets, but those
+// methods will abort instead of throwing.
+#define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4
+#define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP
+
+#define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4
+#define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP
+
+#define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4
+#define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP
+
+// These macros control the availability of all parts of <filesystem> that
+// depend on something in the dylib.
+#define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9
+#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP
+#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH
+#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP
+
+// This controls the availability of the C++20 synchronization library,
+// which requires shared library support for various operations
+// (see libcxx/src/atomic.cpp). This includes <barier>, <latch>,
+// <semaphore>, and notification functions on std::atomic.
+#define _LIBCPP_AVAILABILITY_HAS_SYNC _LIBCPP_INTRODUCED_IN_LLVM_10
+#define _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP
+
+// Enable additional explicit instantiations of iostreams components. This
+// reduces the number of weak definitions generated in programs that use
+// iostreams by providing a single strong definition in the shared library.
+//
+// TODO: Enable additional explicit instantiations on GCC once it supports exclude_from_explicit_instantiation,
+//       or once libc++ doesn't use the attribute anymore.
+// TODO: Enable them on Windows once https://llvm.org/PR41018 has been fixed.
+#if !defined(_LIBCPP_COMPILER_GCC) && !defined(_WIN32)
+#  define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 _LIBCPP_INTRODUCED_IN_LLVM_12
+#else
+#  define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0
+#endif
+
+// This controls the availability of floating-point std::to_chars functions.
+// These overloads were added later than the integer overloads.
+#define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_14
+#define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP
+
+// This controls whether the library claims to provide a default verbose
+// termination function, and consequently whether the headers will try
+// to use it when the mechanism isn't overriden at compile-time.
+#define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT _LIBCPP_INTRODUCED_IN_LLVM_15
+#define _LIBCPP_AVAILABILITY_VERBOSE_ABORT _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP
+
+// This controls the availability of the C++17 std::pmr library,
+// which is implemented in large part in the built library.
+//
+// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed
+//       Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support
+//       it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we
+//       use availability annotations until that bug has been fixed.
+#define _LIBCPP_AVAILABILITY_HAS_PMR _LIBCPP_INTRODUCED_IN_LLVM_16
+#define _LIBCPP_AVAILABILITY_PMR
+
+// These macros controls the availability of __cxa_init_primary_exception
+// in the built library, which std::make_exception_ptr might use
+// (see libcxx/include/__exception/exception_ptr.h).
+#define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION _LIBCPP_INTRODUCED_IN_LLVM_18
+#define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP
+
+// This controls the availability of C++23 <print>, which
+// has a dependency on the built library (it needs access to
+// the underlying buffer types of std::cout, std::cerr, and std::clog.
+#define _LIBCPP_AVAILABILITY_HAS_PRINT _LIBCPP_INTRODUCED_IN_LLVM_18
+#define _LIBCPP_AVAILABILITY_PRINT _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP
+
+// This controls the availability of the C++20 time zone database.
+// The parser code is built in the library.
+#define _LIBCPP_AVAILABILITY_HAS_TZDB _LIBCPP_INTRODUCED_IN_LLVM_19
+#define _LIBCPP_AVAILABILITY_TZDB _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP
+
+// These macros determine whether we assume that std::bad_function_call and
+// std::bad_expected_access provide a key function in the dylib. This allows
+// centralizing their vtable and typeinfo instead of having all TUs provide
+// a weak definition that then gets deduplicated.
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19
+#  define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP
+#  define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19
+#  define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP
+
 // Define availability attributes that depend on _LIBCPP_HAS_NO_EXCEPTIONS.
 // Those are defined in terms of the availability attributes above, and
 // should not be vendor-specific.
diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h
index b194f9431c3be3..a4c4c366cf2414 100644
--- a/libcxx/include/__format/escaped_output_table.h
+++ b/libcxx/include/__format/escaped_output_table.h
@@ -80,10 +80,9 @@ namespace __escaped_output_table {
 /// The entries of the characters to escape in format's debug string.
 ///
 /// Contains the entries for [format.string.escaped]/2.2.1.2.1
-///   CE is a Unicode encoding and C corresponds to either a UCS scalar value
-///   whose Unicode property General_Category has a value in the groups
-///   Separator (Z) or Other (C) or to a UCS scalar value which has the Unicode
-///   property Grapheme_Extend=Yes, as described by table 12 of UAX #44
+///   CE is a Unicode encoding and C corresponds to a UCS scalar value whose
+///   Unicode property General_Category has a value in the groups Separator (Z)
+///   or Other (C), as described by table 12 of UAX #44
 ///
 /// Separator (Z) consists of General_Category
 /// - Space_Separator,
@@ -98,7 +97,6 @@ namespace __escaped_output_table {
 /// - Unassigned.
 ///
 /// The data is generated from
-/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 /// - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
 ///
 /// The table is similar to the table
@@ -110,908 +108,1091 @@ namespace __escaped_output_table {
 /// - bits [0, 10] The size of the range, allowing 2048 elements.
 /// - bits [11, 31] The lower bound code point of the range. The upper bound of
 ///   the range is lower bound + size.
-_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[894] = {
-    0x00000020,
-    0x0003f821,
-    0x00056800,
-    0x0018006f,
-    0x001bc001,
-    0x001c0003,
-    0x001c5800,
-    0x001c6800,
-    0x001d1000,
-    0x00241806,
-    0x00298000,
-    0x002ab801,
-    0x002c5801,
-    0x002c802d,
-    0x002df800,
-    0x002e0801,
-    0x002e2001,
-    0x002e3808,
-    0x002f5803,
-    0x002fa810,
-    0x0030800a,
-    0x0030e000,
-    0x00325814,
-    0x00338000,
-    0x0036b007,
-    0x0036f805,
-    0x00373801,
-    0x00375003,
-    0x00387001,
-    0x00388800,
-    0x0039801c,
-    0x003d300a,
-    0x003d900d,
-    0x003f5808,
-    0x003fd802,
-    0x0040b003,
-    0x0040d808,
-    0x00412802,
-    0x00414806,
-    0x0041f800,
-    0x0042c804,
-    0x0042f800,
-    0x00435804,
-    0x00447810,
-    0x00465038,
-    0x0049d000,
-    0x0049e000,
-    0x004a0807,
-    0x004a6800,
-    0x004a8806,
-    0x004b1001,
-    0x004c0800,
-    0x004c2000,
-    0x004c6801,
-    0x004c8801,
-    0x004d4800,
-    0x004d8800,
-    0x004d9802,
-    0x004dd002,
-    0x004df000,
-    0x004e0805,
-    0x004e4801,
-    0x004e6800,
-    0x004e780c,
-    0x004ef000,
-    0x004f1003,
-    0x004ff004,
-    0x00502000,
-    0x00505803,
-    0x00508801,
-    0x00514800,
-    0x00518800,
-    0x0051a000,
-    0x0051b800,
-    0x0051d003,
-    0x00520817,
-    0x0052e800,
-    0x0052f806,
-    0x00538001,
-    0x0053a800,
-    0x0053b80b,
-    0x00542000,
-    0x00547000,
-    0x00549000,
-    0x00554800,
-    0x00558800,
-    0x0055a000,
-    0x0055d002,
-    0x00560807,
-    0x00565000,
-    0x00566802,
-    0x0056880e,
-    0x00571003,
-    0x00579006,
-    0x0057d007,
-    0x00582000,
-    0x00586801,
-    0x00588801,
-    0x00594800,
-    0x00598800,
-    0x0059a000,
-    0x0059d002,
-    0x0059f001,
-    0x005a0805,
-    0x005a4801,
-    0x005a680e,
-    0x005af000,
-    0x005b1003,
-    0x005bc00a,
-    0x005c2000,
-    0x005c5802,
-    0x005c8800,
-    0x005cb002,
-    0x005cd800,
-    0x005ce800,
-    0x005d0002,
-    0x005d2802,
-    0x005d5802,
-    0x005dd004,
-    0x005e0000,
-    0x005e1802,
-    0x005e4800,
-    0x005e6802,
-    0x005e8814,
-    0x005fd805,
-    0x00602000,
-    0x00606800,
-    0x00608800,
-    0x00614800,
-    0x0061d002,
-    0x0061f002,
-    0x00622812,
-    0x0062d801,
-    0x0062f001,
-    0x00631003,
-    0x00638006,
-    0x00640800,
-    0x00646800,
-    0x00648800,
-    0x00654800,
-    0x0065a000,
-    0x0065d002,
-    0x0065f800,
-    0x00661000,
-    0x00662801,
-    0x00664800,
-    0x00666010,
-    0x0066f800,
-    0x00671003,
-    0x00678000,
-    0x0067a00d,
-    0x00686800,
-    0x00688800,
-    0x0069d801,
-    0x0069f000,
-    0x006a0804,
-    0x006a4800,
-    0x006a6800,
-    0x006a8003,
-    0x006ab800,
-    0x006b1003,
-    0x006c0001,
-    0x006c2000,
-    0x006cb802,
-    0x006d9000,
-    0x006de000,
-    0x006df001,
-    0x006e3808,
-    0x006e9005,
-    0x006ef806,
-    0x006f8001,
-    0x006fa80b,
-    0x00718800,
-    0x0071a00a,
-    0x00723807,
-    0x0072e024,
-    0x00741800,
-    0x00742800,
-    0x00745800,
-    0x00752000,
-    0x00753000,
-    0x00758800,
-    0x0075a008,
-    0x0075f001,
-    0x00762800,
-    0x00763808,
-    0x0076d001,
-    0x0077001f,
-    0x0078c001,
-    0x0079a800,
-    0x0079b800,
-    0x0079c800,
-    0x007a4000,
-    0x007b6811,
-    0x007c0004,
-    0x007c3001,
-    0x007c6830,
-    0x007e3000,
-    0x007e6800,
-    0x007ed824,
-    0x00816803,
-    0x00819005,
-    0x0081c801,
-    0x0081e801,
-    0x0082c001,
-    0x0082f002,
-    0x00838803,
-    0x00841000,
-    0x00842801,
-    0x00846800,
-    0x0084e800,
-    0x00863000,
-    0x00864004,
-    0x00867001,
-    0x00924800,
-    0x00927001,
-    0x0092b800,
-    0x0092c800,
-    0x0092f001,
-    0x00944800,
-    0x00947001,
-    0x00958800,
-    0x0095b001,
-    0x0095f800,
-    0x00960800,
-    0x00963001,
-    0x0096b800,
-    0x00988800,
-    0x0098b001,
-    0x009ad804,
-    0x009be802,
-    0x009cd005,
-    0x009fb001,
-    0x009ff001,
-    0x00b40000,
-    0x00b4e802,
-    0x00b7c806,
-    0x00b89002,
-    0x00b8b008,
-    0x00b99001,
-    0x00b9b808,
-    0x00ba900d,
-    0x00bb6800,
-    0x00bb880e,
-    0x00bda001,
-    0x00bdb806,
-    0x00be3000,
-    0x00be480a,
-    0x00bee802,
-    0x00bf5005,
-    0x00bfd005,
-    0x00c05804,
-    0x00c0d005,
-    0x00c3c806,
-    0x00c42801,
-    0x00c54800,
-    0x00c55804,
-    0x00c7b009,
-    0x00c8f803,
-    0x00c93801,
-    0x00c96003,
-    0x00c99000,
-    0x00c9c806,
-    0x00ca0802,
-    0x00cb7001,
-    0x00cba80a,
-    0x00cd6003,
-    0x00ce5005,
-    0x00ced802,
-    0x00d0b801,
-    0x00d0d802,
-    0x00d2b000,
-    0x00d2c008,
-    0x00d31000,
-    0x00d32807,
-    0x00d3980c,
-    0x00d45005,
-    0x00d4d005,
-    0x00d57055,
-    0x00d9a006,
-    0x00d9e000,
-    0x00da1000,
-    0x00da6802,
-    0x00db5808,
-    0x00dbf802,
-    0x00dd1003,
-    0x00dd4001,
-    0x00dd5802,
-    0x00df3000,
-    0x00df4001,
-    0x00df6800,
-    0x00df7802,
-    0x00dfa007,
-    0x00e16007,
-    0x00e1b004,
-    0x00e25002,
-    0x00e44806,
-    0x00e5d801,
-    0x00e6400a,
-    0x00e6a00c,
-    0x00e71006,
-    0x00e76800,
-    0x00e7a000,
-    0x00e7c001,
-    0x00e7d804,
-    0x00ee003f,
-    0x00f8b001,
-    0x00f8f001,
-    0x00fa3001,
-    0x00fa7001,
-    0x00fac000,
-    0x00fad000,
-    0x00fae000,
-    0x00faf000,
-    0x00fbf001,
-    0x00fda800,
-    0x00fe2800,
-    0x00fea001,
-    0x00fee000,
-    0x00ff8001,
-    0x00ffa800,
-    0x00fff810,
-    0x01014007,
-    0x0102f810,
-    0x01039001,
-    0x01047800,
-    0x0104e802,
-    0x0106083e,
-    0x010c6003,
-    0x01213818,
-    0x01225814,
-    0x015ba001,
-    0x015cb000,
-    0x01677802,
-    0x0167a004,
-    0x01693000,
-    0x01694004,
-    0x01697001,
-    0x016b4006,
-    0x016b880e,
-    0x016cb808,
-    0x016d3800,
-    0x016d7800,
-    0x016db800,
-    0x016df800,
-    0x016e3800,
-    0x016e7800,
-    0x016eb800,
-    0x016ef820,
-    0x0172f021,
-    0x0174d000,
-    0x0177a00b,
-    0x017eb019,
-    0x01800000,
-    0x01815005,
-    0x01820000,
-    0x0184b803,
-    0x01880004,
-    0x01898000,
-    0x018c7800,
-    0x018f200a,
-    0x0190f800,
-    0x05246802,
-    0x05263808,
-    0x05316013,
-    0x05337803,
-    0x0533a009,
-    0x0534f001,
-    0x05378001,
-    0x0537c007,
-    0x053e5804,
-    0x053e9000,
-    0x053ea000,
-    0x053ed017,
-    0x05401000,
-    0x05403000,
-    0x05405800,
-    0x05412801,
-    0x05416003,
-    0x0541d005,
-    0x0543c007,
-    0x05462009,
-    0x0546d017,
-    0x0547f800,
-    0x05493007,
-    0x054a380a,
-    0x054aa00a,
-    0x054be805,
-    0x054d9800,
-    0x054db003,
-    0x054de001,
-    0x054e7000,
-    0x054ed003,
-    0x054f2800,
-    0x054ff800,
-    0x05514805,
-    0x05518801,
-    0x0551a80a,
-    0x05521800,
-    0x05526000,
-    0x05527001,
-    0x0552d001,
-    0x0553e000,
-    0x05558000,
-    0x05559002,
-    0x0555b801,
-    0x0555f001,
-    0x05560800,
-    0x05561817,
-    0x05576001,
-    0x0557b00a,
-    0x05583801,
-    0x05587801,
-    0x0558b808,
-    0x05593800,
-    0x05597800,
-    0x055b6003,
-    0x055f2800,
-    0x055f4000,
-    0x055f6802,
-    0x055fd005,
-    0x06bd200b,
-    0x06be3803,
-    0x06bfe7ff,
-    0x06ffe7ff,
-    0x073fe7ff,
-    0x077fe7ff,
-    0x07bfe103,
-    0x07d37001,
-    0x07d6d025,
-    0x07d8380b,
-    0x07d8c004,
-    0x07d8f000,
-    0x07d9b800,
-    0x07d9e800,
-    0x07d9f800,
-    0x07da1000,
-    0x07da2800,
-    0x07de180f,
-    0x07ec8001,
-    0x07ee4006,
-    0x07ee801f,
-    0x07f0000f,
-    0x07f0d015,
-    0x07f29800,
-    0x07f33800,
-    0x07f36003,
-    0x07f3a800,
-    0x07f7e803,
-    0x07fcf001,
-    0x07fdf802,
-    0x07fe4001,
-    0x07fe8001,
-    0x07fec001,
-    0x07fee802,
-    0x07ff3800,
-    0x07ff780c,
-    0x07fff001,
-    0x08006000,
-    0x08013800,
-    0x0801d800,
-    0x0801f000,
-    0x08027001,
-    0x0802f021,
-    0x0807d804,
-    0x08081803,
-    0x0809a002,
-    0x080c7800,
-    0x080ce802,
-    0x080d082e,
-    0x080fe882,
-    0x0814e802,
-    0x0816880f,
-    0x0817e003,
-    0x08192008,
-    0x081a5804,
-    0x081bb009,
-    0x081cf000,
-    0x081e2003,
-    0x081eb029,
-    0x0824f001,
-    0x08255005,
-    0x0826a003,
-    0x0827e003,
-    0x08294007,
-    0x082b200a,
-    0x082bd800,
-    0x082c5800,
-    0x082c9800,
-    0x082cb000,
-    0x082d1000,
-    0x082d9000,
-    0x082dd000,
-    0x082de842,
-    0x0839b808,
-    0x083ab009,
-    0x083b4017,
-    0x083c3000,
-    0x083d8800,
-    0x083dd844,
-    0x08403001,
-    0x08404800,
-    0x0841b000,
-    0x0841c802,
-    0x0841e801,
-    0x0842b000,
-    0x0844f807,
-    0x0845802f,
-    0x08479800,
-    0x0847b004,
-    0x0848e002,
-    0x0849d004,
-    0x084a003f,
-    0x084dc003,
-    0x084e8001,
-    0x0850080e,
-    0x0850a000,
-    0x0850c000,
-    0x0851b009,
-    0x08524806,
-    0x0852c806,
-    0x0855001f,
-    0x08572805,
-    0x0857b808,
-    0x0859b002,
-    0x085ab001,
-    0x085b9804,
-    0x085c9006,
-    0x085ce80b,
-    0x085d804f,
-    0x08624836,
-    0x0865980c,
-    0x08679806,
-    0x0869200b,
-    0x0869d125,
-    0x0873f800,
-    0x08755002,
-    0x08757001,
-    0x0875904d,
-    0x08794007,
-    0x087a300a,
-    0x087ad015,
-    0x087c1003,
-    0x087c5025,
-    0x087e6013,
-    0x087fb808,
-    0x08800800,
-    0x0881c00e,
-    0x08827003,
-    0x08838000,
-    0x08839801,
-    0x0883b00b,
-    0x08859803,
-    0x0885c801,
-    0x0885e800,
-    0x0886100d,
-    0x08874806,
-    0x0887d008,
-    0x08893804,
-    0x08896808,
-    0x088a4007,
-    0x088b9800,
-    0x088bb80a,
-    0x088db008,
-    0x088e4803,
-    0x088e7800,
-    0x088f0000,
-    0x088fa80a,
-    0x08909000,
-    0x08917802,
-    0x0891a000,
-    0x0891b001,
-    0x0891f000,
-    0x0892083e,
-    0x08943800,
-    0x08944800,
-    0x08947000,
-    0x0894f000,
-    0x08955005,
-    0x0896f800,
-    0x0897180c,
-    0x0897d007,
-    0x08982000,
-    0x08986801,
-    0x08988801,
-    0x08994800,
-    0x08998800,
-    0x0899a000,
-    0x0899d002,
-    0x0899f000,
-    0x089a0000,
-    0x089a2801,
-    0x089a4801,
-    0x089a7001,
-    0x089a880b,
-    0x089b209b,
-    0x08a1c007,
-    0x08a21002,
-    0x08a23000,
-    0x08a2e000,
-    0x08a2f000,
-    0x08a3101d,
-    0x08a58000,
-    0x08a59805,
-    0x08a5d000,
-    0x08a5e800,
-    0x08a5f801,
-    0x08a61001,
-    0x08a64007,
-    0x08a6d0a5,
-    0x08ad7800,
-    0x08ad9005,
-    0x08ade001,
-    0x08adf801,
-    0x08aee023,
-    0x08b19807,
-    0x08b1e800,
-    0x08b1f801,
-    0x08b2280a,
-    0x08b2d005,
-    0x08b36812,
-    0x08b55800,
-    0x08b56800,
-    0x08b58005,
-    0x08b5b800,
-    0x08b5d005,
-    0x08b65035,
-    0x08b8d804,
-    0x08b91003,
-    0x08b93808,
-    0x08ba38b8,
-    0x08c17808,
-    0x08c1c801,
-    0x08c1e063,
-    0x08c7980b,
-    0x08c83801,
-    0x08c85001,
-    0x08c8a000,
-    0x08c8b800,
-    0x08c98000,
-    0x08c9b000,
-    0x08c9c803,
-    0x08c9f000,
-    0x08ca1800,
-    0x08ca3808,
-    0x08cad045,
-    0x08cd4001,
-    0x08cea007,
-    0x08cf0000,
-    0x08cf281a,
-    0x08d00809,
-    0x08d19805,
-    0x08d1d803,
-    0x08d23808,
-    0x08d28805,
-    0x08d2c802,
-    0x08d4500c,
-    0x08d4c001,
-    0x08d5180c,
-    0x08d7c806,
-    0x08d850f5,
-    0x08e04800,
-    0x08e1800d,
-    0x08e1f800,
-    0x08e23009,
-    0x08e36802,
-    0x08e48018,
-    0x08e55006,
-    0x08e59001,
-    0x08e5a84a,
-    0x08e83800,
-    0x08e85000,
-    0x08e98814,
-    0x08ea3808,
-    0x08ead005,
-    0x08eb3000,
-    0x08eb4800,
-    0x08ec7803,
-    0x08eca800,
-    0x08ecb800,
-    0x08ecc806,
-    0x08ed5135,
-    0x08f79801,
-    0x08f7c808,
-    0x08f88800,
-    0x08f9b007,
-    0x08fa0000,
-    0x08fa1000,
-    0x08fad055,
-    0x08fd880e,
-    0x08ff900c,
-    0x091cd065,
-    0x09237800,
-    0x0923a80a,
-    0x092a27ff,
-    0x096a224b,
-    0x097f980c,
-    0x09a18010,
-    0x09a23fff,
-    0x09e23fb8,
-    0x0a323fff,
-    0x0a723fff,
-    0x0ab23fff,
-    0x0af23fff,
-    0x0b3239b8,
-    0x0b51c806,
-    0x0b52f800,
-    0x0b535003,
-    0x0b55f800,
-    0x0b565005,
-    0x0b577006,
-    0x0b57b009,
-    0x0b598006,
-    0x0b5a3009,
-    0x0b5ad000,
-    0x0b5b1000,
-    0x0b5bc004,
-    0x0b5c82af,
-    0x0b74d864,
-    0x0b7a5804,
-    0x0b7c400a,
-    0x0b7d003f,
-    0x0b7f200b,
-    0x0b7f900d,
-    0x0c3fc007,
-    0x0c66b029,
-    0x0c684fff,
-    0x0ca84fff,
-    0x0ce84fff,
-    0x0d284fff,
-    0x0d684ae6,
-    0x0d7fa000,
-    0x0d7fe000,
-    0x0d7ff800,
-    0x0d89180e,
-    0x0d89981c,
-    0x0d8a9801,
-    0x0d8ab00d,
-    0x0d8b4007,
-    0x0d97e7ff,
-    0x0dd7e103,
-    0x0de35804,
-    0x0de3e802,
-    0x0de44806,
-    0x0de4d001,
-    0x0de4e801,
-    0x0de507ff,
-    0x0e2507ff,
-    0x0e6502af,
-    0x0e7e203b,
-    0x0e87b009,
-    0x0e893801,
-    0x0e8b2800,
-    0x0e8b3802,
-    0x0e8b7014,
-    0x0e8c2806,
-    0x0e8d5003,
-    0x0e8f5814,
-    0x0e921002,
-    0x0e923079,
-    0x0e96a00b,
-    0x0e97a00b,
-    0x0e9ab808,
-    0x0e9bc886,
-    0x0ea2a800,
-    0x0ea4e800,
-    0x0ea50001,
-    0x0ea51801,
-    0x0ea53801,
-    0x0ea56800,
-    0x0ea5d000,
-    0x0ea5e000,
-    0x0ea62000,
-    0x0ea83000,
-    0x0ea85801,
-    0x0ea8a800,
-    0x0ea8e800,
-    0x0ea9d000,
-    0x0ea9f800,
-    0x0eaa2800,
-    0x0eaa3802,
-    0x0eaa8800,
-    0x0eb53001,
-    0x0ebe6001,
-    0x0ed00036,
-    0x0ed1d831,
-    0x0ed3a800,
-    0x0ed42000,
-    0x0ed46473,
-    0x0ef8f805,
-    0x0ef95904,
-    0x0f037091,
-    0x0f096809,
-    0x0f09f001,
-    0x0f0a5003,
-    0x0f0a813f,
-    0x0f157011,
-    0x0f176003,
-    0x0f17d004,
-    0x0f1801cf,
-    0x0f276003,
-    0x0f27d2e5,
-    0x0f3f3800,
-    0x0f3f6000,
-    0x0f3f7800,
-    0x0f3ff800,
-    0x0f462801,
-    0x0f46802f,
-    0x0f4a2006,
-    0x0f4a6003,
-    0x0f4ad003,
-    0x0f4b0310,
-    0x0f65a84b,
-    0x0f69f0c1,
-    0x0f702000,
-    0x0f710000,
-    0x0f711800,
-    0x0f712801,
-    0x0f714000,
-    0x0f719800,
-    0x0f71c000,
-    0x0f71d000,
-    0x0f71e005,
-    0x0f721803,
-    0x0f724000,
-    0x0f725000,
-    0x0f726000,
-    0x0f728000,
-    0x0f729800,
-    0x0f72a801,
-    0x0f72c000,
-    0x0f72d000,
-    0x0f72e000,
-    0x0f72f000,
-    0x0f730000,
-    0x0f731800,
-    0x0f732801,
-    0x0f735800,
-    0x0f739800,
-    0x0f73c000,
-    0x0f73e800,
-    0x0f73f800,
-    0x0f745000,
-    0x0f74e004,
-    0x0f752000,
-    0x0f755000,
-    0x0f75e033,
-    0x0f77910d,
-    0x0f816003,
-    0x0f84a00b,
-    0x0f857801,
-    0x0f860000,
-    0x0f868000,
-    0x0f87b009,
-    0x0f8d7037,
-    0x0f90180c,
-    0x0f91e003,
-    0x0f924806,
-    0x0f92900d,
-    0x0f933099,
-    0x0fb6c003,
-    0x0fb76802,
-    0x0fb7e802,
-    0x0fbbb803,
-    0x0fbed005,
-    0x0fbf6003,
-    0x0fbf880e,
-    0x0fc06003,
-    0x0fc24007,
-    0x0fc2d005,
-    0x0fc44007,
-    0x0fc57001,
-    0x0fc5904d,
-    0x0fd2a00b,
-    0x0fd37001,
-    0x0fd3e802,
-    0x0fd44806,
-    0x0fd5f000,
-    0x0fd63007,
-    0x0fd6e003,
-    0x0fd74806,
-    0x0fd7c806,
-    0x0fdc9800,
-    0x0fde5824,
-    0x0fdfd405,
-    0x1537001f,
-    0x15b9d005,
-    0x15c0f001,
-    0x1675100d,
-    0x175f080e,
-    0x1772f7ff,
-    0x17b2f1a1,
-    0x17d0f5e1,
-    0x189a5804};
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1077] = {
+    0x00000020 /* 00000000 - 00000020 [   33] */,
+    0x0003f821 /* 0000007f - 000000a0 [   34] */,
+    0x00056800 /* 000000ad - 000000ad [    1] */,
+    0x001bc001 /* 00000378 - 00000379 [    2] */,
+    0x001c0003 /* 00000380 - 00000383 [    4] */,
+    0x001c5800 /* 0000038b - 0000038b [    1] */,
+    0x001c6800 /* 0000038d - 0000038d [    1] */,
+    0x001d1000 /* 000003a2 - 000003a2 [    1] */,
+    0x00298000 /* 00000530 - 00000530 [    1] */,
+    0x002ab801 /* 00000557 - 00000558 [    2] */,
+    0x002c5801 /* 0000058b - 0000058c [    2] */,
+    0x002c8000 /* 00000590 - 00000590 [    1] */,
+    0x002e4007 /* 000005c8 - 000005cf [    8] */,
+    0x002f5803 /* 000005eb - 000005ee [    4] */,
+    0x002fa810 /* 000005f5 - 00000605 [   17] */,
+    0x0030e000 /* 0000061c - 0000061c [    1] */,
+    0x0036e800 /* 000006dd - 000006dd [    1] */,
+    0x00387001 /* 0000070e - 0000070f [    2] */,
+    0x003a5801 /* 0000074b - 0000074c [    2] */,
+    0x003d900d /* 000007b2 - 000007bf [   14] */,
+    0x003fd801 /* 000007fb - 000007fc [    2] */,
+    0x00417001 /* 0000082e - 0000082f [    2] */,
+    0x0041f800 /* 0000083f - 0000083f [    1] */,
+    0x0042e001 /* 0000085c - 0000085d [    2] */,
+    0x0042f800 /* 0000085f - 0000085f [    1] */,
+    0x00435804 /* 0000086b - 0000086f [    5] */,
+    0x00447808 /* 0000088f - 00000897 [    9] */,
+    0x00471000 /* 000008e2 - 000008e2 [    1] */,
+    0x004c2000 /* 00000984 - 00000984 [    1] */,
+    0x004c6801 /* 0000098d - 0000098e [    2] */,
+    0x004c8801 /* 00000991 - 00000992 [    2] */,
+    0x004d4800 /* 000009a9 - 000009a9 [    1] */,
+    0x004d8800 /* 000009b1 - 000009b1 [    1] */,
+    0x004d9802 /* 000009b3 - 000009b5 [    3] */,
+    0x004dd001 /* 000009ba - 000009bb [    2] */,
+    0x004e2801 /* 000009c5 - 000009c6 [    2] */,
+    0x004e4801 /* 000009c9 - 000009ca [    2] */,
+    0x004e7807 /* 000009cf - 000009d6 [    8] */,
+    0x004ec003 /* 000009d8 - 000009db [    4] */,
+    0x004ef000 /* 000009de - 000009de [    1] */,
+    0x004f2001 /* 000009e4 - 000009e5 [    2] */,
+    0x004ff801 /* 000009ff - 00000a00 [    2] */,
+    0x00502000 /* 00000a04 - 00000a04 [    1] */,
+    0x00505803 /* 00000a0b - 00000a0e [    4] */,
+    0x00508801 /* 00000a11 - 00000a12 [    2] */,
+    0x00514800 /* 00000a29 - 00000a29 [    1] */,
+    0x00518800 /* 00000a31 - 00000a31 [    1] */,
+    0x0051a000 /* 00000a34 - 00000a34 [    1] */,
+    0x0051b800 /* 00000a37 - 00000a37 [    1] */,
+    0x0051d001 /* 00000a3a - 00000a3b [    2] */,
+    0x0051e800 /* 00000a3d - 00000a3d [    1] */,
+    0x00521803 /* 00000a43 - 00000a46 [    4] */,
+    0x00524801 /* 00000a49 - 00000a4a [    2] */,
+    0x00527002 /* 00000a4e - 00000a50 [    3] */,
+    0x00529006 /* 00000a52 - 00000a58 [    7] */,
+    0x0052e800 /* 00000a5d - 00000a5d [    1] */,
+    0x0052f806 /* 00000a5f - 00000a65 [    7] */,
+    0x0053b809 /* 00000a77 - 00000a80 [   10] */,
+    0x00542000 /* 00000a84 - 00000a84 [    1] */,
+    0x00547000 /* 00000a8e - 00000a8e [    1] */,
+    0x00549000 /* 00000a92 - 00000a92 [    1] */,
+    0x00554800 /* 00000aa9 - 00000aa9 [    1] */,
+    0x00558800 /* 00000ab1 - 00000ab1 [    1] */,
+    0x0055a000 /* 00000ab4 - 00000ab4 [    1] */,
+    0x0055d001 /* 00000aba - 00000abb [    2] */,
+    0x00563000 /* 00000ac6 - 00000ac6 [    1] */,
+    0x00565000 /* 00000aca - 00000aca [    1] */,
+    0x00567001 /* 00000ace - 00000acf [    2] */,
+    0x0056880e /* 00000ad1 - 00000adf [   15] */,
+    0x00572001 /* 00000ae4 - 00000ae5 [    2] */,
+    0x00579006 /* 00000af2 - 00000af8 [    7] */,
+    0x00580000 /* 00000b00 - 00000b00 [    1] */,
+    0x00582000 /* 00000b04 - 00000b04 [    1] */,
+    0x00586801 /* 00000b0d - 00000b0e [    2] */,
+    0x00588801 /* 00000b11 - 00000b12 [    2] */,
+    0x00594800 /* 00000b29 - 00000b29 [    1] */,
+    0x00598800 /* 00000b31 - 00000b31 [    1] */,
+    0x0059a000 /* 00000b34 - 00000b34 [    1] */,
+    0x0059d001 /* 00000b3a - 00000b3b [    2] */,
+    0x005a2801 /* 00000b45 - 00000b46 [    2] */,
+    0x005a4801 /* 00000b49 - 00000b4a [    2] */,
+    0x005a7006 /* 00000b4e - 00000b54 [    7] */,
+    0x005ac003 /* 00000b58 - 00000b5b [    4] */,
+    0x005af000 /* 00000b5e - 00000b5e [    1] */,
+    0x005b2001 /* 00000b64 - 00000b65 [    2] */,
+    0x005bc009 /* 00000b78 - 00000b81 [   10] */,
+    0x005c2000 /* 00000b84 - 00000b84 [    1] */,
+    0x005c5802 /* 00000b8b - 00000b8d [    3] */,
+    0x005c8800 /* 00000b91 - 00000b91 [    1] */,
+    0x005cb002 /* 00000b96 - 00000b98 [    3] */,
+    0x005cd800 /* 00000b9b - 00000b9b [    1] */,
+    0x005ce800 /* 00000b9d - 00000b9d [    1] */,
+    0x005d0002 /* 00000ba0 - 00000ba2 [    3] */,
+    0x005d2802 /* 00000ba5 - 00000ba7 [    3] */,
+    0x005d5802 /* 00000bab - 00000bad [    3] */,
+    0x005dd003 /* 00000bba - 00000bbd [    4] */,
+    0x005e1802 /* 00000bc3 - 00000bc5 [    3] */,
+    0x005e4800 /* 00000bc9 - 00000bc9 [    1] */,
+    0x005e7001 /* 00000bce - 00000bcf [    2] */,
+    0x005e8805 /* 00000bd1 - 00000bd6 [    6] */,
+    0x005ec00d /* 00000bd8 - 00000be5 [   14] */,
+    0x005fd804 /* 00000bfb - 00000bff [    5] */,
+    0x00606800 /* 00000c0d - 00000c0d [    1] */,
+    0x00608800 /* 00000c11 - 00000c11 [    1] */,
+    0x00614800 /* 00000c29 - 00000c29 [    1] */,
+    0x0061d001 /* 00000c3a - 00000c3b [    2] */,
+    0x00622800 /* 00000c45 - 00000c45 [    1] */,
+    0x00624800 /* 00000c49 - 00000c49 [    1] */,
+    0x00627006 /* 00000c4e - 00000c54 [    7] */,
+    0x0062b800 /* 00000c57 - 00000c57 [    1] */,
+    0x0062d801 /* 00000c5b - 00000c5c [    2] */,
+    0x0062f001 /* 00000c5e - 00000c5f [    2] */,
+    0x00632001 /* 00000c64 - 00000c65 [    2] */,
+    0x00638006 /* 00000c70 - 00000c76 [    7] */,
+    0x00646800 /* 00000c8d - 00000c8d [    1] */,
+    0x00648800 /* 00000c91 - 00000c91 [    1] */,
+    0x00654800 /* 00000ca9 - 00000ca9 [    1] */,
+    0x0065a000 /* 00000cb4 - 00000cb4 [    1] */,
+    0x0065d001 /* 00000cba - 00000cbb [    2] */,
+    0x00662800 /* 00000cc5 - 00000cc5 [    1] */,
+    0x00664800 /* 00000cc9 - 00000cc9 [    1] */,
+    0x00667006 /* 00000cce - 00000cd4 [    7] */,
+    0x0066b805 /* 00000cd7 - 00000cdc [    6] */,
+    0x0066f800 /* 00000cdf - 00000cdf [    1] */,
+    0x00672001 /* 00000ce4 - 00000ce5 [    2] */,
+    0x00678000 /* 00000cf0 - 00000cf0 [    1] */,
+    0x0067a00b /* 00000cf4 - 00000cff [   12] */,
+    0x00686800 /* 00000d0d - 00000d0d [    1] */,
+    0x00688800 /* 00000d11 - 00000d11 [    1] */,
+    0x006a2800 /* 00000d45 - 00000d45 [    1] */,
+    0x006a4800 /* 00000d49 - 00000d49 [    1] */,
+    0x006a8003 /* 00000d50 - 00000d53 [    4] */,
+    0x006b2001 /* 00000d64 - 00000d65 [    2] */,
+    0x006c0000 /* 00000d80 - 00000d80 [    1] */,
+    0x006c2000 /* 00000d84 - 00000d84 [    1] */,
+    0x006cb802 /* 00000d97 - 00000d99 [    3] */,
+    0x006d9000 /* 00000db2 - 00000db2 [    1] */,
+    0x006de000 /* 00000dbc - 00000dbc [    1] */,
+    0x006df001 /* 00000dbe - 00000dbf [    2] */,
+    0x006e3802 /* 00000dc7 - 00000dc9 [    3] */,
+    0x006e5803 /* 00000dcb - 00000dce [    4] */,
+    0x006ea800 /* 00000dd5 - 00000dd5 [    1] */,
+    0x006eb800 /* 00000dd7 - 00000dd7 [    1] */,
+    0x006f0005 /* 00000de0 - 00000de5 [    6] */,
+    0x006f8001 /* 00000df0 - 00000df1 [    2] */,
+    0x006fa80b /* 00000df5 - 00000e00 [   12] */,
+    0x0071d803 /* 00000e3b - 00000e3e [    4] */,
+    0x0072e024 /* 00000e5c - 00000e80 [   37] */,
+    0x00741800 /* 00000e83 - 00000e83 [    1] */,
+    0x00742800 /* 00000e85 - 00000e85 [    1] */,
+    0x00745800 /* 00000e8b - 00000e8b [    1] */,
+    0x00752000 /* 00000ea4 - 00000ea4 [    1] */,
+    0x00753000 /* 00000ea6 - 00000ea6 [    1] */,
+    0x0075f001 /* 00000ebe - 00000ebf [    2] */,
+    0x00762800 /* 00000ec5 - 00000ec5 [    1] */,
+    0x00763800 /* 00000ec7 - 00000ec7 [    1] */,
+    0x00767800 /* 00000ecf - 00000ecf [    1] */,
+    0x0076d001 /* 00000eda - 00000edb [    2] */,
+    0x0077001f /* 00000ee0 - 00000eff [   32] */,
+    0x007a4000 /* 00000f48 - 00000f48 [    1] */,
+    0x007b6803 /* 00000f6d - 00000f70 [    4] */,
+    0x007cc000 /* 00000f98 - 00000f98 [    1] */,
+    0x007de800 /* 00000fbd - 00000fbd [    1] */,
+    0x007e6800 /* 00000fcd - 00000fcd [    1] */,
+    0x007ed824 /* 00000fdb - 00000fff [   37] */,
+    0x00863000 /* 000010c6 - 000010c6 [    1] */,
+    0x00864004 /* 000010c8 - 000010cc [    5] */,
+    0x00867001 /* 000010ce - 000010cf [    2] */,
+    0x00924800 /* 00001249 - 00001249 [    1] */,
+    0x00927001 /* 0000124e - 0000124f [    2] */,
+    0x0092b800 /* 00001257 - 00001257 [    1] */,
+    0x0092c800 /* 00001259 - 00001259 [    1] */,
+    0x0092f001 /* 0000125e - 0000125f [    2] */,
+    0x00944800 /* 00001289 - 00001289 [    1] */,
+    0x00947001 /* 0000128e - 0000128f [    2] */,
+    0x00958800 /* 000012b1 - 000012b1 [    1] */,
+    0x0095b001 /* 000012b6 - 000012b7 [    2] */,
+    0x0095f800 /* 000012bf - 000012bf [    1] */,
+    0x00960800 /* 000012c1 - 000012c1 [    1] */,
+    0x00963001 /* 000012c6 - 000012c7 [    2] */,
+    0x0096b800 /* 000012d7 - 000012d7 [    1] */,
+    0x00988800 /* 00001311 - 00001311 [    1] */,
+    0x0098b001 /* 00001316 - 00001317 [    2] */,
+    0x009ad801 /* 0000135b - 0000135c [    2] */,
+    0x009be802 /* 0000137d - 0000137f [    3] */,
+    0x009cd005 /* 0000139a - 0000139f [    6] */,
+    0x009fb001 /* 000013f6 - 000013f7 [    2] */,
+    0x009ff001 /* 000013fe - 000013ff [    2] */,
+    0x00b40000 /* 00001680 - 00001680 [    1] */,
+    0x00b4e802 /* 0000169d - 0000169f [    3] */,
+    0x00b7c806 /* 000016f9 - 000016ff [    7] */,
+    0x00b8b008 /* 00001716 - 0000171e [    9] */,
+    0x00b9b808 /* 00001737 - 0000173f [    9] */,
+    0x00baa00b /* 00001754 - 0000175f [   12] */,
+    0x00bb6800 /* 0000176d - 0000176d [    1] */,
+    0x00bb8800 /* 00001771 - 00001771 [    1] */,
+    0x00bba00b /* 00001774 - 0000177f [   12] */,
+    0x00bef001 /* 000017de - 000017df [    2] */,
+    0x00bf5005 /* 000017ea - 000017ef [    6] */,
+    0x00bfd005 /* 000017fa - 000017ff [    6] */,
+    0x00c07000 /* 0000180e - 0000180e [    1] */,
+    0x00c0d005 /* 0000181a - 0000181f [    6] */,
+    0x00c3c806 /* 00001879 - 0000187f [    7] */,
+    0x00c55804 /* 000018ab - 000018af [    5] */,
+    0x00c7b009 /* 000018f6 - 000018ff [   10] */,
+    0x00c8f800 /* 0000191f - 0000191f [    1] */,
+    0x00c96003 /* 0000192c - 0000192f [    4] */,
+    0x00c9e003 /* 0000193c - 0000193f [    4] */,
+    0x00ca0802 /* 00001941 - 00001943 [    3] */,
+    0x00cb7001 /* 0000196e - 0000196f [    2] */,
+    0x00cba80a /* 00001975 - 0000197f [   11] */,
+    0x00cd6003 /* 000019ac - 000019af [    4] */,
+    0x00ce5005 /* 000019ca - 000019cf [    6] */,
+    0x00ced802 /* 000019db - 000019dd [    3] */,
+    0x00d0e001 /* 00001a1c - 00001a1d [    2] */,
+    0x00d2f800 /* 00001a5f - 00001a5f [    1] */,
+    0x00d3e801 /* 00001a7d - 00001a7e [    2] */,
+    0x00d45005 /* 00001a8a - 00001a8f [    6] */,
+    0x00d4d005 /* 00001a9a - 00001a9f [    6] */,
+    0x00d57001 /* 00001aae - 00001aaf [    2] */,
+    0x00d67830 /* 00001acf - 00001aff [   49] */,
+    0x00da6802 /* 00001b4d - 00001b4f [    3] */,
+    0x00dbf800 /* 00001b7f - 00001b7f [    1] */,
+    0x00dfa007 /* 00001bf4 - 00001bfb [    8] */,
+    0x00e1c002 /* 00001c38 - 00001c3a [    3] */,
+    0x00e25002 /* 00001c4a - 00001c4c [    3] */,
+    0x00e44806 /* 00001c89 - 00001c8f [    7] */,
+    0x00e5d801 /* 00001cbb - 00001cbc [    2] */,
+    0x00e64007 /* 00001cc8 - 00001ccf [    8] */,
+    0x00e7d804 /* 00001cfb - 00001cff [    5] */,
+    0x00f8b001 /* 00001f16 - 00001f17 [    2] */,
+    0x00f8f001 /* 00001f1e - 00001f1f [    2] */,
+    0x00fa3001 /* 00001f46 - 00001f47 [    2] */,
+    0x00fa7001 /* 00001f4e - 00001f4f [    2] */,
+    0x00fac000 /* 00001f58 - 00001f58 [    1] */,
+    0x00fad000 /* 00001f5a - 00001f5a [    1] */,
+    0x00fae000 /* 00001f5c - 00001f5c [    1] */,
+    0x00faf000 /* 00001f5e - 00001f5e [    1] */,
+    0x00fbf001 /* 00001f7e - 00001f7f [    2] */,
+    0x00fda800 /* 00001fb5 - 00001fb5 [    1] */,
+    0x00fe2800 /* 00001fc5 - 00001fc5 [    1] */,
+    0x00fea001 /* 00001fd4 - 00001fd5 [    2] */,
+    0x00fee000 /* 00001fdc - 00001fdc [    1] */,
+    0x00ff8001 /* 00001ff0 - 00001ff1 [    2] */,
+    0x00ffa800 /* 00001ff5 - 00001ff5 [    1] */,
+    0x00fff810 /* 00001fff - 0000200f [   17] */,
+    0x01014007 /* 00002028 - 0000202f [    8] */,
+    0x0102f810 /* 0000205f - 0000206f [   17] */,
+    0x01039001 /* 00002072 - 00002073 [    2] */,
+    0x01047800 /* 0000208f - 0000208f [    1] */,
+    0x0104e802 /* 0000209d - 0000209f [    3] */,
+    0x0106080e /* 000020c1 - 000020cf [   15] */,
+    0x0107880e /* 000020f1 - 000020ff [   15] */,
+    0x010c6003 /* 0000218c - 0000218f [    4] */,
+    0x01213818 /* 00002427 - 0000243f [   25] */,
+    0x01225814 /* 0000244b - 0000245f [   21] */,
+    0x015ba001 /* 00002b74 - 00002b75 [    2] */,
+    0x015cb000 /* 00002b96 - 00002b96 [    1] */,
+    0x0167a004 /* 00002cf4 - 00002cf8 [    5] */,
+    0x01693000 /* 00002d26 - 00002d26 [    1] */,
+    0x01694004 /* 00002d28 - 00002d2c [    5] */,
+    0x01697001 /* 00002d2e - 00002d2f [    2] */,
+    0x016b4006 /* 00002d68 - 00002d6e [    7] */,
+    0x016b880d /* 00002d71 - 00002d7e [   14] */,
+    0x016cb808 /* 00002d97 - 00002d9f [    9] */,
+    0x016d3800 /* 00002da7 - 00002da7 [    1] */,
+    0x016d7800 /* 00002daf - 00002daf [    1] */,
+    0x016db800 /* 00002db7 - 00002db7 [    1] */,
+    0x016df800 /* 00002dbf - 00002dbf [    1] */,
+    0x016e3800 /* 00002dc7 - 00002dc7 [    1] */,
+    0x016e7800 /* 00002dcf - 00002dcf [    1] */,
+    0x016eb800 /* 00002dd7 - 00002dd7 [    1] */,
+    0x016ef800 /* 00002ddf - 00002ddf [    1] */,
+    0x0172f021 /* 00002e5e - 00002e7f [   34] */,
+    0x0174d000 /* 00002e9a - 00002e9a [    1] */,
+    0x0177a00b /* 00002ef4 - 00002eff [   12] */,
+    0x017eb019 /* 00002fd6 - 00002fef [   26] */,
+    0x01800000 /* 00003000 - 00003000 [    1] */,
+    0x01820000 /* 00003040 - 00003040 [    1] */,
+    0x0184b801 /* 00003097 - 00003098 [    2] */,
+    0x01880004 /* 00003100 - 00003104 [    5] */,
+    0x01898000 /* 00003130 - 00003130 [    1] */,
+    0x018c7800 /* 0000318f - 0000318f [    1] */,
+    0x018f200a /* 000031e4 - 000031ee [   11] */,
+    0x0190f800 /* 0000321f - 0000321f [    1] */,
+    0x05246802 /* 0000a48d - 0000a48f [    3] */,
+    0x05263808 /* 0000a4c7 - 0000a4cf [    9] */,
+    0x05316013 /* 0000a62c - 0000a63f [   20] */,
+    0x0537c007 /* 0000a6f8 - 0000a6ff [    8] */,
+    0x053e5804 /* 0000a7cb - 0000a7cf [    5] */,
+    0x053e9000 /* 0000a7d2 - 0000a7d2 [    1] */,
+    0x053ea000 /* 0000a7d4 - 0000a7d4 [    1] */,
+    0x053ed017 /* 0000a7da - 0000a7f1 [   24] */,
+    0x05416802 /* 0000a82d - 0000a82f [    3] */,
+    0x0541d005 /* 0000a83a - 0000a83f [    6] */,
+    0x0543c007 /* 0000a878 - 0000a87f [    8] */,
+    0x05463007 /* 0000a8c6 - 0000a8cd [    8] */,
+    0x0546d005 /* 0000a8da - 0000a8df [    6] */,
+    0x054aa00a /* 0000a954 - 0000a95e [   11] */,
+    0x054be802 /* 0000a97d - 0000a97f [    3] */,
+    0x054e7000 /* 0000a9ce - 0000a9ce [    1] */,
+    0x054ed003 /* 0000a9da - 0000a9dd [    4] */,
+    0x054ff800 /* 0000a9ff - 0000a9ff [    1] */,
+    0x0551b808 /* 0000aa37 - 0000aa3f [    9] */,
+    0x05527001 /* 0000aa4e - 0000aa4f [    2] */,
+    0x0552d001 /* 0000aa5a - 0000aa5b [    2] */,
+    0x05561817 /* 0000aac3 - 0000aada [   24] */,
+    0x0557b809 /* 0000aaf7 - 0000ab00 [   10] */,
+    0x05583801 /* 0000ab07 - 0000ab08 [    2] */,
+    0x05587801 /* 0000ab0f - 0000ab10 [    2] */,
+    0x0558b808 /* 0000ab17 - 0000ab1f [    9] */,
+    0x05593800 /* 0000ab27 - 0000ab27 [    1] */,
+    0x05597800 /* 0000ab2f - 0000ab2f [    1] */,
+    0x055b6003 /* 0000ab6c - 0000ab6f [    4] */,
+    0x055f7001 /* 0000abee - 0000abef [    2] */,
+    0x055fd005 /* 0000abfa - 0000abff [    6] */,
+    0x06bd200b /* 0000d7a4 - 0000d7af [   12] */,
+    0x06be3803 /* 0000d7c7 - 0000d7ca [    4] */,
+    0x06bfe7ff /* 0000d7fc - 0000dffb [ 2048] */,
+    0x06ffe7ff /* 0000dffc - 0000e7fb [ 2048] */,
+    0x073fe7ff /* 0000e7fc - 0000effb [ 2048] */,
+    0x077fe7ff /* 0000effc - 0000f7fb [ 2048] */,
+    0x07bfe103 /* 0000f7fc - 0000f8ff [  260] */,
+    0x07d37001 /* 0000fa6e - 0000fa6f [    2] */,
+    0x07d6d025 /* 0000fada - 0000faff [   38] */,
+    0x07d8380b /* 0000fb07 - 0000fb12 [   12] */,
+    0x07d8c004 /* 0000fb18 - 0000fb1c [    5] */,
+    0x07d9b800 /* 0000fb37 - 0000fb37 [    1] */,
+    0x07d9e800 /* 0000fb3d - 0000fb3d [    1] */,
+    0x07d9f800 /* 0000fb3f - 0000fb3f [    1] */,
+    0x07da1000 /* 0000fb42 - 0000fb42 [    1] */,
+    0x07da2800 /* 0000fb45 - 0000fb45 [    1] */,
+    0x07de180f /* 0000fbc3 - 0000fbd2 [   16] */,
+    0x07ec8001 /* 0000fd90 - 0000fd91 [    2] */,
+    0x07ee4006 /* 0000fdc8 - 0000fdce [    7] */,
+    0x07ee801f /* 0000fdd0 - 0000fdef [   32] */,
+    0x07f0d005 /* 0000fe1a - 0000fe1f [    6] */,
+    0x07f29800 /* 0000fe53 - 0000fe53 [    1] */,
+    0x07f33800 /* 0000fe67 - 0000fe67 [    1] */,
+    0x07f36003 /* 0000fe6c - 0000fe6f [    4] */,
+    0x07f3a800 /* 0000fe75 - 0000fe75 [    1] */,
+    0x07f7e803 /* 0000fefd - 0000ff00 [    4] */,
+    0x07fdf802 /* 0000ffbf - 0000ffc1 [    3] */,
+    0x07fe4001 /* 0000ffc8 - 0000ffc9 [    2] */,
+    0x07fe8001 /* 0000ffd0 - 0000ffd1 [    2] */,
+    0x07fec001 /* 0000ffd8 - 0000ffd9 [    2] */,
+    0x07fee802 /* 0000ffdd - 0000ffdf [    3] */,
+    0x07ff3800 /* 0000ffe7 - 0000ffe7 [    1] */,
+    0x07ff780c /* 0000ffef - 0000fffb [   13] */,
+    0x07fff001 /* 0000fffe - 0000ffff [    2] */,
+    0x08006000 /* 0001000c - 0001000c [    1] */,
+    0x08013800 /* 00010027 - 00010027 [    1] */,
+    0x0801d800 /* 0001003b - 0001003b [    1] */,
+    0x0801f000 /* 0001003e - 0001003e [    1] */,
+    0x08027001 /* 0001004e - 0001004f [    2] */,
+    0x0802f021 /* 0001005e - 0001007f [   34] */,
+    0x0807d804 /* 000100fb - 000100ff [    5] */,
+    0x08081803 /* 00010103 - 00010106 [    4] */,
+    0x0809a002 /* 00010134 - 00010136 [    3] */,
+    0x080c7800 /* 0001018f - 0001018f [    1] */,
+    0x080ce802 /* 0001019d - 0001019f [    3] */,
+    0x080d082e /* 000101a1 - 000101cf [   47] */,
+    0x080ff081 /* 000101fe - 0001027f [  130] */,
+    0x0814e802 /* 0001029d - 0001029f [    3] */,
+    0x0816880e /* 000102d1 - 000102df [   15] */,
+    0x0817e003 /* 000102fc - 000102ff [    4] */,
+    0x08192008 /* 00010324 - 0001032c [    9] */,
+    0x081a5804 /* 0001034b - 0001034f [    5] */,
+    0x081bd804 /* 0001037b - 0001037f [    5] */,
+    0x081cf000 /* 0001039e - 0001039e [    1] */,
+    0x081e2003 /* 000103c4 - 000103c7 [    4] */,
+    0x081eb029 /* 000103d6 - 000103ff [   42] */,
+    0x0824f001 /* 0001049e - 0001049f [    2] */,
+    0x08255005 /* 000104aa - 000104af [    6] */,
+    0x0826a003 /* 000104d4 - 000104d7 [    4] */,
+    0x0827e003 /* 000104fc - 000104ff [    4] */,
+    0x08294007 /* 00010528 - 0001052f [    8] */,
+    0x082b200a /* 00010564 - 0001056e [   11] */,
+    0x082bd800 /* 0001057b - 0001057b [    1] */,
+    0x082c5800 /* 0001058b - 0001058b [    1] */,
+    0x082c9800 /* 00010593 - 00010593 [    1] */,
+    0x082cb000 /* 00010596 - 00010596 [    1] */,
+    0x082d1000 /* 000105a2 - 000105a2 [    1] */,
+    0x082d9000 /* 000105b2 - 000105b2 [    1] */,
+    0x082dd000 /* 000105ba - 000105ba [    1] */,
+    0x082de842 /* 000105bd - 000105ff [   67] */,
+    0x0839b808 /* 00010737 - 0001073f [    9] */,
+    0x083ab009 /* 00010756 - 0001075f [   10] */,
+    0x083b4017 /* 00010768 - 0001077f [   24] */,
+    0x083c3000 /* 00010786 - 00010786 [    1] */,
+    0x083d8800 /* 000107b1 - 000107b1 [    1] */,
+    0x083dd844 /* 000107bb - 000107ff [   69] */,
+    0x08403001 /* 00010806 - 00010807 [    2] */,
+    0x08404800 /* 00010809 - 00010809 [    1] */,
+    0x0841b000 /* 00010836 - 00010836 [    1] */,
+    0x0841c802 /* 00010839 - 0001083b [    3] */,
+    0x0841e801 /* 0001083d - 0001083e [    2] */,
+    0x0842b000 /* 00010856 - 00010856 [    1] */,
+    0x0844f807 /* 0001089f - 000108a6 [    8] */,
+    0x0845802f /* 000108b0 - 000108df [   48] */,
+    0x08479800 /* 000108f3 - 000108f3 [    1] */,
+    0x0847b004 /* 000108f6 - 000108fa [    5] */,
+    0x0848e002 /* 0001091c - 0001091e [    3] */,
+    0x0849d004 /* 0001093a - 0001093e [    5] */,
+    0x084a003f /* 00010940 - 0001097f [   64] */,
+    0x084dc003 /* 000109b8 - 000109bb [    4] */,
+    0x084e8001 /* 000109d0 - 000109d1 [    2] */,
+    0x08502000 /* 00010a04 - 00010a04 [    1] */,
+    0x08503804 /* 00010a07 - 00010a0b [    5] */,
+    0x0850a000 /* 00010a14 - 00010a14 [    1] */,
+    0x0850c000 /* 00010a18 - 00010a18 [    1] */,
+    0x0851b001 /* 00010a36 - 00010a37 [    2] */,
+    0x0851d803 /* 00010a3b - 00010a3e [    4] */,
+    0x08524806 /* 00010a49 - 00010a4f [    7] */,
+    0x0852c806 /* 00010a59 - 00010a5f [    7] */,
+    0x0855001f /* 00010aa0 - 00010abf [   32] */,
+    0x08573803 /* 00010ae7 - 00010aea [    4] */,
+    0x0857b808 /* 00010af7 - 00010aff [    9] */,
+    0x0859b002 /* 00010b36 - 00010b38 [    3] */,
+    0x085ab001 /* 00010b56 - 00010b57 [    2] */,
+    0x085b9804 /* 00010b73 - 00010b77 [    5] */,
+    0x085c9006 /* 00010b92 - 00010b98 [    7] */,
+    0x085ce80b /* 00010b9d - 00010ba8 [   12] */,
+    0x085d804f /* 00010bb0 - 00010bff [   80] */,
+    0x08624836 /* 00010c49 - 00010c7f [   55] */,
+    0x0865980c /* 00010cb3 - 00010cbf [   13] */,
+    0x08679806 /* 00010cf3 - 00010cf9 [    7] */,
+    0x08694007 /* 00010d28 - 00010d2f [    8] */,
+    0x0869d125 /* 00010d3a - 00010e5f [  294] */,
+    0x0873f800 /* 00010e7f - 00010e7f [    1] */,
+    0x08755000 /* 00010eaa - 00010eaa [    1] */,
+    0x08757001 /* 00010eae - 00010eaf [    2] */,
+    0x0875904a /* 00010eb2 - 00010efc [   75] */,
+    0x08794007 /* 00010f28 - 00010f2f [    8] */,
+    0x087ad015 /* 00010f5a - 00010f6f [   22] */,
+    0x087c5025 /* 00010f8a - 00010faf [   38] */,
+    0x087e6013 /* 00010fcc - 00010fdf [   20] */,
+    0x087fb808 /* 00010ff7 - 00010fff [    9] */,
+    0x08827003 /* 0001104e - 00011051 [    4] */,
+    0x0883b008 /* 00011076 - 0001107e [    9] */,
+    0x0885e800 /* 000110bd - 000110bd [    1] */,
+    0x0886180c /* 000110c3 - 000110cf [   13] */,
+    0x08874806 /* 000110e9 - 000110ef [    7] */,
+    0x0887d005 /* 000110fa - 000110ff [    6] */,
+    0x0889a800 /* 00011135 - 00011135 [    1] */,
+    0x088a4007 /* 00011148 - 0001114f [    8] */,
+    0x088bb808 /* 00011177 - 0001117f [    9] */,
+    0x088f0000 /* 000111e0 - 000111e0 [    1] */,
+    0x088fa80a /* 000111f5 - 000111ff [   11] */,
+    0x08909000 /* 00011212 - 00011212 [    1] */,
+    0x0892103d /* 00011242 - 0001127f [   62] */,
+    0x08943800 /* 00011287 - 00011287 [    1] */,
+    0x08944800 /* 00011289 - 00011289 [    1] */,
+    0x08947000 /* 0001128e - 0001128e [    1] */,
+    0x0894f000 /* 0001129e - 0001129e [    1] */,
+    0x08955005 /* 000112aa - 000112af [    6] */,
+    0x08975804 /* 000112eb - 000112ef [    5] */,
+    0x0897d005 /* 000112fa - 000112ff [    6] */,
+    0x08982000 /* 00011304 - 00011304 [    1] */,
+    0x08986801 /* 0001130d - 0001130e [    2] */,
+    0x08988801 /* 00011311 - 00011312 [    2] */,
+    0x08994800 /* 00011329 - 00011329 [    1] */,
+    0x08998800 /* 00011331 - 00011331 [    1] */,
+    0x0899a000 /* 00011334 - 00011334 [    1] */,
+    0x0899d000 /* 0001133a - 0001133a [    1] */,
+    0x089a2801 /* 00011345 - 00011346 [    2] */,
+    0x089a4801 /* 00011349 - 0001134a [    2] */,
+    0x089a7001 /* 0001134e - 0001134f [    2] */,
+    0x089a8805 /* 00011351 - 00011356 [    6] */,
+    0x089ac004 /* 00011358 - 0001135c [    5] */,
+    0x089b2001 /* 00011364 - 00011365 [    2] */,
+    0x089b6802 /* 0001136d - 0001136f [    3] */,
+    0x089ba88a /* 00011375 - 000113ff [  139] */,
+    0x08a2e000 /* 0001145c - 0001145c [    1] */,
+    0x08a3101d /* 00011462 - 0001147f [   30] */,
+    0x08a64007 /* 000114c8 - 000114cf [    8] */,
+    0x08a6d0a5 /* 000114da - 0001157f [  166] */,
+    0x08adb001 /* 000115b6 - 000115b7 [    2] */,
+    0x08aef021 /* 000115de - 000115ff [   34] */,
+    0x08b2280a /* 00011645 - 0001164f [   11] */,
+    0x08b2d005 /* 0001165a - 0001165f [    6] */,
+    0x08b36812 /* 0001166d - 0001167f [   19] */,
+    0x08b5d005 /* 000116ba - 000116bf [    6] */,
+    0x08b65035 /* 000116ca - 000116ff [   54] */,
+    0x08b8d801 /* 0001171b - 0001171c [    2] */,
+    0x08b96003 /* 0001172c - 0001172f [    4] */,
+    0x08ba38b8 /* 00011747 - 000117ff [  185] */,
+    0x08c1e063 /* 0001183c - 0001189f [  100] */,
+    0x08c7980b /* 000118f3 - 000118fe [   12] */,
+    0x08c83801 /* 00011907 - 00011908 [    2] */,
+    0x08c85001 /* 0001190a - 0001190b [    2] */,
+    0x08c8a000 /* 00011914 - 00011914 [    1] */,
+    0x08c8b800 /* 00011917 - 00011917 [    1] */,
+    0x08c9b000 /* 00011936 - 00011936 [    1] */,
+    0x08c9c801 /* 00011939 - 0001193a [    2] */,
+    0x08ca3808 /* 00011947 - 0001194f [    9] */,
+    0x08cad045 /* 0001195a - 0001199f [   70] */,
+    0x08cd4001 /* 000119a8 - 000119a9 [    2] */,
+    0x08cec001 /* 000119d8 - 000119d9 [    2] */,
+    0x08cf281a /* 000119e5 - 000119ff [   27] */,
+    0x08d24007 /* 00011a48 - 00011a4f [    8] */,
+    0x08d5180c /* 00011aa3 - 00011aaf [   13] */,
+    0x08d7c806 /* 00011af9 - 00011aff [    7] */,
+    0x08d850f5 /* 00011b0a - 00011bff [  246] */,
+    0x08e04800 /* 00011c09 - 00011c09 [    1] */,
+    0x08e1b800 /* 00011c37 - 00011c37 [    1] */,
+    0x08e23009 /* 00011c46 - 00011c4f [   10] */,
+    0x08e36802 /* 00011c6d - 00011c6f [    3] */,
+    0x08e48001 /* 00011c90 - 00011c91 [    2] */,
+    0x08e54000 /* 00011ca8 - 00011ca8 [    1] */,
+    0x08e5b848 /* 00011cb7 - 00011cff [   73] */,
+    0x08e83800 /* 00011d07 - 00011d07 [    1] */,
+    0x08e85000 /* 00011d0a - 00011d0a [    1] */,
+    0x08e9b802 /* 00011d37 - 00011d39 [    3] */,
+    0x08e9d800 /* 00011d3b - 00011d3b [    1] */,
+    0x08e9f000 /* 00011d3e - 00011d3e [    1] */,
+    0x08ea4007 /* 00011d48 - 00011d4f [    8] */,
+    0x08ead005 /* 00011d5a - 00011d5f [    6] */,
+    0x08eb3000 /* 00011d66 - 00011d66 [    1] */,
+    0x08eb4800 /* 00011d69 - 00011d69 [    1] */,
+    0x08ec7800 /* 00011d8f - 00011d8f [    1] */,
+    0x08ec9000 /* 00011d92 - 00011d92 [    1] */,
+    0x08ecc806 /* 00011d99 - 00011d9f [    7] */,
+    0x08ed5135 /* 00011daa - 00011edf [  310] */,
+    0x08f7c806 /* 00011ef9 - 00011eff [    7] */,
+    0x08f88800 /* 00011f11 - 00011f11 [    1] */,
+    0x08f9d802 /* 00011f3b - 00011f3d [    3] */,
+    0x08fad055 /* 00011f5a - 00011faf [   86] */,
+    0x08fd880e /* 00011fb1 - 00011fbf [   15] */,
+    0x08ff900c /* 00011ff2 - 00011ffe [   13] */,
+    0x091cd065 /* 0001239a - 000123ff [  102] */,
+    0x09237800 /* 0001246f - 0001246f [    1] */,
+    0x0923a80a /* 00012475 - 0001247f [   11] */,
+    0x092a27ff /* 00012544 - 00012d43 [ 2048] */,
+    0x096a224b /* 00012d44 - 00012f8f [  588] */,
+    0x097f980c /* 00012ff3 - 00012fff [   13] */,
+    0x09a1800f /* 00013430 - 0001343f [   16] */,
+    0x09a2b7ff /* 00013456 - 00013c55 [ 2048] */,
+    0x09e2b7a9 /* 00013c56 - 000143ff [ 1962] */,
+    0x0a323fff /* 00014647 - 00014e46 [ 2048] */,
+    0x0a723fff /* 00014e47 - 00015646 [ 2048] */,
+    0x0ab23fff /* 00015647 - 00015e46 [ 2048] */,
+    0x0af23fff /* 00015e47 - 00016646 [ 2048] */,
+    0x0b3239b8 /* 00016647 - 000167ff [  441] */,
+    0x0b51c806 /* 00016a39 - 00016a3f [    7] */,
+    0x0b52f800 /* 00016a5f - 00016a5f [    1] */,
+    0x0b535003 /* 00016a6a - 00016a6d [    4] */,
+    0x0b55f800 /* 00016abf - 00016abf [    1] */,
+    0x0b565005 /* 00016aca - 00016acf [    6] */,
+    0x0b577001 /* 00016aee - 00016aef [    2] */,
+    0x0b57b009 /* 00016af6 - 00016aff [   10] */,
+    0x0b5a3009 /* 00016b46 - 00016b4f [   10] */,
+    0x0b5ad000 /* 00016b5a - 00016b5a [    1] */,
+    0x0b5b1000 /* 00016b62 - 00016b62 [    1] */,
+    0x0b5bc004 /* 00016b78 - 00016b7c [    5] */,
+    0x0b5c82af /* 00016b90 - 00016e3f [  688] */,
+    0x0b74d864 /* 00016e9b - 00016eff [  101] */,
+    0x0b7a5803 /* 00016f4b - 00016f4e [    4] */,
+    0x0b7c4006 /* 00016f88 - 00016f8e [    7] */,
+    0x0b7d003f /* 00016fa0 - 00016fdf [   64] */,
+    0x0b7f280a /* 00016fe5 - 00016fef [   11] */,
+    0x0b7f900d /* 00016ff2 - 00016fff [   14] */,
+    0x0c3fc007 /* 000187f8 - 000187ff [    8] */,
+    0x0c66b029 /* 00018cd6 - 00018cff [   42] */,
+    0x0c684fff /* 00018d09 - 00019508 [ 2048] */,
+    0x0ca84fff /* 00019509 - 00019d08 [ 2048] */,
+    0x0ce84fff /* 00019d09 - 0001a508 [ 2048] */,
+    0x0d284fff /* 0001a509 - 0001ad08 [ 2048] */,
+    0x0d684ae6 /* 0001ad09 - 0001afef [  743] */,
+    0x0d7fa000 /* 0001aff4 - 0001aff4 [    1] */,
+    0x0d7fe000 /* 0001affc - 0001affc [    1] */,
+    0x0d7ff800 /* 0001afff - 0001afff [    1] */,
+    0x0d89180e /* 0001b123 - 0001b131 [   15] */,
+    0x0d89981c /* 0001b133 - 0001b14f [   29] */,
+    0x0d8a9801 /* 0001b153 - 0001b154 [    2] */,
+    0x0d8ab00d /* 0001b156 - 0001b163 [   14] */,
+    0x0d8b4007 /* 0001b168 - 0001b16f [    8] */,
+    0x0d97e7ff /* 0001b2fc - 0001bafb [ 2048] */,
+    0x0dd7e103 /* 0001bafc - 0001bbff [  260] */,
+    0x0de35804 /* 0001bc6b - 0001bc6f [    5] */,
+    0x0de3e802 /* 0001bc7d - 0001bc7f [    3] */,
+    0x0de44806 /* 0001bc89 - 0001bc8f [    7] */,
+    0x0de4d001 /* 0001bc9a - 0001bc9b [    2] */,
+    0x0de507ff /* 0001bca0 - 0001c49f [ 2048] */,
+    0x0e2507ff /* 0001c4a0 - 0001cc9f [ 2048] */,
+    0x0e65025f /* 0001cca0 - 0001ceff [  608] */,
+    0x0e797001 /* 0001cf2e - 0001cf2f [    2] */,
+    0x0e7a3808 /* 0001cf47 - 0001cf4f [    9] */,
+    0x0e7e203b /* 0001cfc4 - 0001cfff [   60] */,
+    0x0e87b009 /* 0001d0f6 - 0001d0ff [   10] */,
+    0x0e893801 /* 0001d127 - 0001d128 [    2] */,
+    0x0e8b9807 /* 0001d173 - 0001d17a [    8] */,
+    0x0e8f5814 /* 0001d1eb - 0001d1ff [   21] */,
+    0x0e923079 /* 0001d246 - 0001d2bf [  122] */,
+    0x0e96a00b /* 0001d2d4 - 0001d2df [   12] */,
+    0x0e97a00b /* 0001d2f4 - 0001d2ff [   12] */,
+    0x0e9ab808 /* 0001d357 - 0001d35f [    9] */,
+    0x0e9bc886 /* 0001d379 - 0001d3ff [  135] */,
+    0x0ea2a800 /* 0001d455 - 0001d455 [    1] */,
+    0x0ea4e800 /* 0001d49d - 0001d49d [    1] */,
+    0x0ea50001 /* 0001d4a0 - 0001d4a1 [    2] */,
+    0x0ea51801 /* 0001d4a3 - 0001d4a4 [    2] */,
+    0x0ea53801 /* 0001d4a7 - 0001d4a8 [    2] */,
+    0x0ea56800 /* 0001d4ad - 0001d4ad [    1] */,
+    0x0ea5d000 /* 0001d4ba - 0001d4ba [    1] */,
+    0x0ea5e000 /* 0001d4bc - 0001d4bc [    1] */,
+    0x0ea62000 /* 0001d4c4 - 0001d4c4 [    1] */,
+    0x0ea83000 /* 0001d506 - 0001d506 [    1] */,
+    0x0ea85801 /* 0001d50b - 0001d50c [    2] */,
+    0x0ea8a800 /* 0001d515 - 0001d515 [    1] */,
+    0x0ea8e800 /* 0001d51d - 0001d51d [    1] */,
+    0x0ea9d000 /* 0001d53a - 0001d53a [    1] */,
+    0x0ea9f800 /* 0001d53f - 0001d53f [    1] */,
+    0x0eaa2800 /* 0001d545 - 0001d545 [    1] */,
+    0x0eaa3802 /* 0001d547 - 0001d549 [    3] */,
+    0x0eaa8800 /* 0001d551 - 0001d551 [    1] */,
+    0x0eb53001 /* 0001d6a6 - 0001d6a7 [    2] */,
+    0x0ebe6001 /* 0001d7cc - 0001d7cd [    2] */,
+    0x0ed4600e /* 0001da8c - 0001da9a [   15] */,
+    0x0ed50000 /* 0001daa0 - 0001daa0 [    1] */,
+    0x0ed5844f /* 0001dab0 - 0001deff [ 1104] */,
+    0x0ef8f805 /* 0001df1f - 0001df24 [    6] */,
+    0x0ef958d4 /* 0001df2b - 0001dfff [  213] */,
+    0x0f003800 /* 0001e007 - 0001e007 [    1] */,
+    0x0f00c801 /* 0001e019 - 0001e01a [    2] */,
+    0x0f011000 /* 0001e022 - 0001e022 [    1] */,
+    0x0f012800 /* 0001e025 - 0001e025 [    1] */,
+    0x0f015804 /* 0001e02b - 0001e02f [    5] */,
+    0x0f037020 /* 0001e06e - 0001e08e [   33] */,
+    0x0f04806f /* 0001e090 - 0001e0ff [  112] */,
+    0x0f096802 /* 0001e12d - 0001e12f [    3] */,
+    0x0f09f001 /* 0001e13e - 0001e13f [    2] */,
+    0x0f0a5003 /* 0001e14a - 0001e14d [    4] */,
+    0x0f0a813f /* 0001e150 - 0001e28f [  320] */,
+    0x0f157810 /* 0001e2af - 0001e2bf [   17] */,
+    0x0f17d004 /* 0001e2fa - 0001e2fe [    5] */,
+    0x0f1801cf /* 0001e300 - 0001e4cf [  464] */,
+    0x0f27d2e5 /* 0001e4fa - 0001e7df [  742] */,
+    0x0f3f3800 /* 0001e7e7 - 0001e7e7 [    1] */,
+    0x0f3f6000 /* 0001e7ec - 0001e7ec [    1] */,
+    0x0f3f7800 /* 0001e7ef - 0001e7ef [    1] */,
+    0x0f3ff800 /* 0001e7ff - 0001e7ff [    1] */,
+    0x0f462801 /* 0001e8c5 - 0001e8c6 [    2] */,
+    0x0f46b828 /* 0001e8d7 - 0001e8ff [   41] */,
+    0x0f4a6003 /* 0001e94c - 0001e94f [    4] */,
+    0x0f4ad003 /* 0001e95a - 0001e95d [    4] */,
+    0x0f4b0310 /* 0001e960 - 0001ec70 [  785] */,
+    0x0f65a84b /* 0001ecb5 - 0001ed00 [   76] */,
+    0x0f69f0c1 /* 0001ed3e - 0001edff [  194] */,
+    0x0f702000 /* 0001ee04 - 0001ee04 [    1] */,
+    0x0f710000 /* 0001ee20 - 0001ee20 [    1] */,
+    0x0f711800 /* 0001ee23 - 0001ee23 [    1] */,
+    0x0f712801 /* 0001ee25 - 0001ee26 [    2] */,
+    0x0f714000 /* 0001ee28 - 0001ee28 [    1] */,
+    0x0f719800 /* 0001ee33 - 0001ee33 [    1] */,
+    0x0f71c000 /* 0001ee38 - 0001ee38 [    1] */,
+    0x0f71d000 /* 0001ee3a - 0001ee3a [    1] */,
+    0x0f71e005 /* 0001ee3c - 0001ee41 [    6] */,
+    0x0f721803 /* 0001ee43 - 0001ee46 [    4] */,
+    0x0f724000 /* 0001ee48 - 0001ee48 [    1] */,
+    0x0f725000 /* 0001ee4a - 0001ee4a [    1] */,
+    0x0f726000 /* 0001ee4c - 0001ee4c [    1] */,
+    0x0f728000 /* 0001ee50 - 0001ee50 [    1] */,
+    0x0f729800 /* 0001ee53 - 0001ee53 [    1] */,
+    0x0f72a801 /* 0001ee55 - 0001ee56 [    2] */,
+    0x0f72c000 /* 0001ee58 - 0001ee58 [    1] */,
+    0x0f72d000 /* 0001ee5a - 0001ee5a [    1] */,
+    0x0f72e000 /* 0001ee5c - 0001ee5c [    1] */,
+    0x0f72f000 /* 0001ee5e - 0001ee5e [    1] */,
+    0x0f730000 /* 0001ee60 - 0001ee60 [    1] */,
+    0x0f731800 /* 0001ee63 - 0001ee63 [    1] */,
+    0x0f732801 /* 0001ee65 - 0001ee66 [    2] */,
+    0x0f735800 /* 0001ee6b - 0001ee6b [    1] */,
+    0x0f739800 /* 0001ee73 - 0001ee73 [    1] */,
+    0x0f73c000 /* 0001ee78 - 0001ee78 [    1] */,
+    0x0f73e800 /* 0001ee7d - 0001ee7d [    1] */,
+    0x0f73f800 /* 0001ee7f - 0001ee7f [    1] */,
+    0x0f745000 /* 0001ee8a - 0001ee8a [    1] */,
+    0x0f74e004 /* 0001ee9c - 0001eea0 [    5] */,
+    0x0f752000 /* 0001eea4 - 0001eea4 [    1] */,
+    0x0f755000 /* 0001eeaa - 0001eeaa [    1] */,
+    0x0f75e033 /* 0001eebc - 0001eeef [   52] */,
+    0x0f77910d /* 0001eef2 - 0001efff [  270] */,
+    0x0f816003 /* 0001f02c - 0001f02f [    4] */,
+    0x0f84a00b /* 0001f094 - 0001f09f [   12] */,
+    0x0f857801 /* 0001f0af - 0001f0b0 [    2] */,
+    0x0f860000 /* 0001f0c0 - 0001f0c0 [    1] */,
+    0x0f868000 /* 0001f0d0 - 0001f0d0 [    1] */,
+    0x0f87b009 /* 0001f0f6 - 0001f0ff [   10] */,
+    0x0f8d7037 /* 0001f1ae - 0001f1e5 [   56] */,
+    0x0f90180c /* 0001f203 - 0001f20f [   13] */,
+    0x0f91e003 /* 0001f23c - 0001f23f [    4] */,
+    0x0f924806 /* 0001f249 - 0001f24f [    7] */,
+    0x0f92900d /* 0001f252 - 0001f25f [   14] */,
+    0x0f933099 /* 0001f266 - 0001f2ff [  154] */,
+    0x0fb6c003 /* 0001f6d8 - 0001f6db [    4] */,
+    0x0fb76802 /* 0001f6ed - 0001f6ef [    3] */,
+    0x0fb7e802 /* 0001f6fd - 0001f6ff [    3] */,
+    0x0fbbb803 /* 0001f777 - 0001f77a [    4] */,
+    0x0fbed005 /* 0001f7da - 0001f7df [    6] */,
+    0x0fbf6003 /* 0001f7ec - 0001f7ef [    4] */,
+    0x0fbf880e /* 0001f7f1 - 0001f7ff [   15] */,
+    0x0fc06003 /* 0001f80c - 0001f80f [    4] */,
+    0x0fc24007 /* 0001f848 - 0001f84f [    8] */,
+    0x0fc2d005 /* 0001f85a - 0001f85f [    6] */,
+    0x0fc44007 /* 0001f888 - 0001f88f [    8] */,
+    0x0fc57001 /* 0001f8ae - 0001f8af [    2] */,
+    0x0fc5904d /* 0001f8b2 - 0001f8ff [   78] */,
+    0x0fd2a00b /* 0001fa54 - 0001fa5f [   12] */,
+    0x0fd37001 /* 0001fa6e - 0001fa6f [    2] */,
+    0x0fd3e802 /* 0001fa7d - 0001fa7f [    3] */,
+    0x0fd44806 /* 0001fa89 - 0001fa8f [    7] */,
+    0x0fd5f000 /* 0001fabe - 0001fabe [    1] */,
+    0x0fd63007 /* 0001fac6 - 0001facd [    8] */,
+    0x0fd6e003 /* 0001fadc - 0001fadf [    4] */,
+    0x0fd74806 /* 0001fae9 - 0001faef [    7] */,
+    0x0fd7c806 /* 0001faf9 - 0001faff [    7] */,
+    0x0fdc9800 /* 0001fb93 - 0001fb93 [    1] */,
+    0x0fde5824 /* 0001fbcb - 0001fbef [   37] */,
+    0x0fdfd405 /* 0001fbfa - 0001ffff [ 1030] */,
+    0x1537001f /* 0002a6e0 - 0002a6ff [   32] */,
+    0x15b9d005 /* 0002b73a - 0002b73f [    6] */,
+    0x15c0f001 /* 0002b81e - 0002b81f [    2] */,
+    0x1675100d /* 0002cea2 - 0002ceaf [   14] */,
+    0x175f080e /* 0002ebe1 - 0002ebef [   15] */,
+    0x1772f7ff /* 0002ee5e - 0002f65d [ 2048] */,
+    0x17b2f1a1 /* 0002f65e - 0002f7ff [  418] */,
+    0x17d0f5e1 /* 0002fa1e - 0002ffff [ 1506] */,
+    0x189a5804 /* 0003134b - 0003134f [    5] */,
+    0x191d87ff /* 000323b0 - 00032baf [ 2048] */,
+    0x195d87ff /* 00032bb0 - 000333af [ 2048] */,
+    0x199d87ff /* 000333b0 - 00033baf [ 2048] */,
+    0x19dd87ff /* 00033bb0 - 000343af [ 2048] */,
+    0x1a1d87ff /* 000343b0 - 00034baf [ 2048] */,
+    0x1a5d87ff /* 00034bb0 - 000353af [ 2048] */,
+    0x1a9d87ff /* 000353b0 - 00035baf [ 2048] */,
+    0x1add87ff /* 00035bb0 - 000363af [ 2048] */,
+    0x1b1d87ff /* 000363b0 - 00036baf [ 2048] */,
+    0x1b5d87ff /* 00036bb0 - 000373af [ 2048] */,
+    0x1b9d87ff /* 000373b0 - 00037baf [ 2048] */,
+    0x1bdd87ff /* 00037bb0 - 000383af [ 2048] */,
+    0x1c1d87ff /* 000383b0 - 00038baf [ 2048] */,
+    0x1c5d87ff /* 00038bb0 - 000393af [ 2048] */,
+    0x1c9d87ff /* 000393b0 - 00039baf [ 2048] */,
+    0x1cdd87ff /* 00039bb0 - 0003a3af [ 2048] */,
+    0x1d1d87ff /* 0003a3b0 - 0003abaf [ 2048] */,
+    0x1d5d87ff /* 0003abb0 - 0003b3af [ 2048] */,
+    0x1d9d87ff /* 0003b3b0 - 0003bbaf [ 2048] */,
+    0x1ddd87ff /* 0003bbb0 - 0003c3af [ 2048] */,
+    0x1e1d87ff /* 0003c3b0 - 0003cbaf [ 2048] */,
+    0x1e5d87ff /* 0003cbb0 - 0003d3af [ 2048] */,
+    0x1e9d87ff /* 0003d3b0 - 0003dbaf [ 2048] */,
+    0x1edd87ff /* 0003dbb0 - 0003e3af [ 2048] */,
+    0x1f1d87ff /* 0003e3b0 - 0003ebaf [ 2048] */,
+    0x1f5d87ff /* 0003ebb0 - 0003f3af [ 2048] */,
+    0x1f9d87ff /* 0003f3b0 - 0003fbaf [ 2048] */,
+    0x1fdd87ff /* 0003fbb0 - 000403af [ 2048] */,
+    0x201d87ff /* 000403b0 - 00040baf [ 2048] */,
+    0x205d87ff /* 00040bb0 - 000413af [ 2048] */,
+    0x209d87ff /* 000413b0 - 00041baf [ 2048] */,
+    0x20dd87ff /* 00041bb0 - 000423af [ 2048] */,
+    0x211d87ff /* 000423b0 - 00042baf [ 2048] */,
+    0x215d87ff /* 00042bb0 - 000433af [ 2048] */,
+    0x219d87ff /* 000433b0 - 00043baf [ 2048] */,
+    0x21dd87ff /* 00043bb0 - 000443af [ 2048] */,
+    0x221d87ff /* 000443b0 - 00044baf [ 2048] */,
+    0x225d87ff /* 00044bb0 - 000453af [ 2048] */,
+    0x229d87ff /* 000453b0 - 00045baf [ 2048] */,
+    0x22dd87ff /* 00045bb0 - 000463af [ 2048] */,
+    0x231d87ff /* 000463b0 - 00046baf [ 2048] */,
+    0x235d87ff /* 00046bb0 - 000473af [ 2048] */,
+    0x239d87ff /* 000473b0 - 00047baf [ 2048] */,
+    0x23dd87ff /* 00047bb0 - 000483af [ 2048] */,
+    0x241d87ff /* 000483b0 - 00048baf [ 2048] */,
+    0x245d87ff /* 00048bb0 - 000493af [ 2048] */,
+    0x249d87ff /* 000493b0 - 00049baf [ 2048] */,
+    0x24dd87ff /* 00049bb0 - 0004a3af [ 2048] */,
+    0x251d87ff /* 0004a3b0 - 0004abaf [ 2048] */,
+    0x255d87ff /* 0004abb0 - 0004b3af [ 2048] */,
+    0x259d87ff /* 0004b3b0 - 0004bbaf [ 2048] */,
+    0x25dd87ff /* 0004bbb0 - 0004c3af [ 2048] */,
+    0x261d87ff /* 0004c3b0 - 0004cbaf [ 2048] */,
+    0x265d87ff /* 0004cbb0 - 0004d3af [ 2048] */,
+    0x269d87ff /* 0004d3b0 - 0004dbaf [ 2048] */,
+    0x26dd87ff /* 0004dbb0 - 0004e3af [ 2048] */,
+    0x271d87ff /* 0004e3b0 - 0004ebaf [ 2048] */,
+    0x275d87ff /* 0004ebb0 - 0004f3af [ 2048] */,
+    0x279d87ff /* 0004f3b0 - 0004fbaf [ 2048] */,
+    0x27dd87ff /* 0004fbb0 - 000503af [ 2048] */,
+    0x281d87ff /* 000503b0 - 00050baf [ 2048] */,
+    0x285d87ff /* 00050bb0 - 000513af [ 2048] */,
+    0x289d87ff /* 000513b0 - 00051baf [ 2048] */,
+    0x28dd87ff /* 00051bb0 - 000523af [ 2048] */,
+    0x291d87ff /* 000523b0 - 00052baf [ 2048] */,
+    0x295d87ff /* 00052bb0 - 000533af [ 2048] */,
+    0x299d87ff /* 000533b0 - 00053baf [ 2048] */,
+    0x29dd87ff /* 00053bb0 - 000543af [ 2048] */,
+    0x2a1d87ff /* 000543b0 - 00054baf [ 2048] */,
+    0x2a5d87ff /* 00054bb0 - 000553af [ 2048] */,
+    0x2a9d87ff /* 000553b0 - 00055baf [ 2048] */,
+    0x2add87ff /* 00055bb0 - 000563af [ 2048] */,
+    0x2b1d87ff /* 000563b0 - 00056baf [ 2048] */,
+    0x2b5d87ff /* 00056bb0 - 000573af [ 2048] */,
+    0x2b9d87ff /* 000573b0 - 00057baf [ 2048] */,
+    0x2bdd87ff /* 00057bb0 - 000583af [ 2048] */,
+    0x2c1d87ff /* 000583b0 - 00058baf [ 2048] */,
+    0x2c5d87ff /* 00058bb0 - 000593af [ 2048] */,
+    0x2c9d87ff /* 000593b0 - 00059baf [ 2048] */,
+    0x2cdd87ff /* 00059bb0 - 0005a3af [ 2048] */,
+    0x2d1d87ff /* 0005a3b0 - 0005abaf [ 2048] */,
+    0x2d5d87ff /* 0005abb0 - 0005b3af [ 2048] */,
+    0x2d9d87ff /* 0005b3b0 - 0005bbaf [ 2048] */,
+    0x2ddd87ff /* 0005bbb0 - 0005c3af [ 2048] */,
+    0x2e1d87ff /* 0005c3b0 - 0005cbaf [ 2048] */,
+    0x2e5d87ff /* 0005cbb0 - 0005d3af [ 2048] */,
+    0x2e9d87ff /* 0005d3b0 - 0005dbaf [ 2048] */,
+    0x2edd87ff /* 0005dbb0 - 0005e3af [ 2048] */,
+    0x2f1d87ff /* 0005e3b0 - 0005ebaf [ 2048] */,
+    0x2f5d87ff /* 0005ebb0 - 0005f3af [ 2048] */,
+    0x2f9d87ff /* 0005f3b0 - 0005fbaf [ 2048] */,
+    0x2fdd87ff /* 0005fbb0 - 000603af [ 2048] */,
+    0x301d87ff /* 000603b0 - 00060baf [ 2048] */,
+    0x305d87ff /* 00060bb0 - 000613af [ 2048] */,
+    0x309d87ff /* 000613b0 - 00061baf [ 2048] */,
+    0x30dd87ff /* 00061bb0 - 000623af [ 2048] */,
+    0x311d87ff /* 000623b0 - 00062baf [ 2048] */,
+    0x315d87ff /* 00062bb0 - 000633af [ 2048] */,
+    0x319d87ff /* 000633b0 - 00063baf [ 2048] */,
+    0x31dd87ff /* 00063bb0 - 000643af [ 2048] */,
+    0x321d87ff /* 000643b0 - 00064baf [ 2048] */,
+    0x325d87ff /* 00064bb0 - 000653af [ 2048] */,
+    0x329d87ff /* 000653b0 - 00065baf [ 2048] */,
+    0x32dd87ff /* 00065bb0 - 000663af [ 2048] */,
+    0x331d87ff /* 000663b0 - 00066baf [ 2048] */,
+    0x335d87ff /* 00066bb0 - 000673af [ 2048] */,
+    0x339d87ff /* 000673b0 - 00067baf [ 2048] */,
+    0x33dd87ff /* 00067bb0 - 000683af [ 2048] */,
+    0x341d87ff /* 000683b0 - 00068baf [ 2048] */,
+    0x345d87ff /* 00068bb0 - 000693af [ 2048] */,
+    0x349d87ff /* 000693b0 - 00069baf [ 2048] */,
+    0x34dd87ff /* 00069bb0 - 0006a3af [ 2048] */,
+    0x351d87ff /* 0006a3b0 - 0006abaf [ 2048] */,
+    0x355d87ff /* 0006abb0 - 0006b3af [ 2048] */,
+    0x359d87ff /* 0006b3b0 - 0006bbaf [ 2048] */,
+    0x35dd87ff /* 0006bbb0 - 0006c3af [ 2048] */,
+    0x361d87ff /* 0006c3b0 - 0006cbaf [ 2048] */,
+    0x365d87ff /* 0006cbb0 - 0006d3af [ 2048] */,
+    0x369d87ff /* 0006d3b0 - 0006dbaf [ 2048] */,
+    0x36dd87ff /* 0006dbb0 - 0006e3af [ 2048] */,
+    0x371d87ff /* 0006e3b0 - 0006ebaf [ 2048] */,
+    0x375d87ff /* 0006ebb0 - 0006f3af [ 2048] */,
+    0x379d87ff /* 0006f3b0 - 0006fbaf [ 2048] */,
+    0x37dd87ff /* 0006fbb0 - 000703af [ 2048] */,
+    0x381d87ff /* 000703b0 - 00070baf [ 2048] */,
+    0x385d87ff /* 00070bb0 - 000713af [ 2048] */,
+    0x389d87ff /* 000713b0 - 00071baf [ 2048] */,
+    0x38dd87ff /* 00071bb0 - 000723af [ 2048] */,
+    0x391d87ff /* 000723b0 - 00072baf [ 2048] */,
+    0x395d87ff /* 00072bb0 - 000733af [ 2048] */,
+    0x399d87ff /* 000733b0 - 00073baf [ 2048] */,
+    0x39dd87ff /* 00073bb0 - 000743af [ 2048] */,
+    0x3a1d87ff /* 000743b0 - 00074baf [ 2048] */,
+    0x3a5d87ff /* 00074bb0 - 000753af [ 2048] */,
+    0x3a9d87ff /* 000753b0 - 00075baf [ 2048] */,
+    0x3add87ff /* 00075bb0 - 000763af [ 2048] */,
+    0x3b1d87ff /* 000763b0 - 00076baf [ 2048] */,
+    0x3b5d87ff /* 00076bb0 - 000773af [ 2048] */,
+    0x3b9d87ff /* 000773b0 - 00077baf [ 2048] */,
+    0x3bdd87ff /* 00077bb0 - 000783af [ 2048] */,
+    0x3c1d87ff /* 000783b0 - 00078baf [ 2048] */,
+    0x3c5d87ff /* 00078bb0 - 000793af [ 2048] */,
+    0x3c9d87ff /* 000793b0 - 00079baf [ 2048] */,
+    0x3cdd87ff /* 00079bb0 - 0007a3af [ 2048] */,
+    0x3d1d87ff /* 0007a3b0 - 0007abaf [ 2048] */,
+    0x3d5d87ff /* 0007abb0 - 0007b3af [ 2048] */,
+    0x3d9d87ff /* 0007b3b0 - 0007bbaf [ 2048] */,
+    0x3ddd87ff /* 0007bbb0 - 0007c3af [ 2048] */,
+    0x3e1d87ff /* 0007c3b0 - 0007cbaf [ 2048] */,
+    0x3e5d87ff /* 0007cbb0 - 0007d3af [ 2048] */,
+    0x3e9d87ff /* 0007d3b0 - 0007dbaf [ 2048] */,
+    0x3edd87ff /* 0007dbb0 - 0007e3af [ 2048] */,
+    0x3f1d87ff /* 0007e3b0 - 0007ebaf [ 2048] */,
+    0x3f5d87ff /* 0007ebb0 - 0007f3af [ 2048] */,
+    0x3f9d87ff /* 0007f3b0 - 0007fbaf [ 2048] */,
+    0x3fdd87ff /* 0007fbb0 - 000803af [ 2048] */,
+    0x401d87ff /* 000803b0 - 00080baf [ 2048] */,
+    0x405d87ff /* 00080bb0 - 000813af [ 2048] */,
+    0x409d87ff /* 000813b0 - 00081baf [ 2048] */,
+    0x40dd87ff /* 00081bb0 - 000823af [ 2048] */,
+    0x411d87ff /* 000823b0 - 00082baf [ 2048] */,
+    0x415d87ff /* 00082bb0 - 000833af [ 2048] */,
+    0x419d87ff /* 000833b0 - 00083baf [ 2048] */,
+    0x41dd87ff /* 00083bb0 - 000843af [ 2048] */,
+    0x421d87ff /* 000843b0 - 00084baf [ 2048] */,
+    0x425d87ff /* 00084bb0 - 000853af [ 2048] */,
+    0x429d87ff /* 000853b0 - 00085baf [ 2048] */,
+    0x42dd87ff /* 00085bb0 - 000863af [ 2048] */,
+    0x431d87ff /* 000863b0 - 00086baf [ 2048] */,
+    0x435d87ff /* 00086bb0 - 000873af [ 2048] */,
+    0x439d87ff /* 000873b0 - 00087baf [ 2048] */,
+    0x43dd87ff /* 00087bb0 - 000883af [ 2048] */,
+    0x441d87ff /* 000883b0 - 00088baf [ 2048] */,
+    0x445d87ff /* 00088bb0 - 000893af [ 2048] */,
+    0x449d87ff /* 000893b0 - 00089baf [ 2048] */,
+    0x44dd87ff /* 00089bb0 - 0008a3af [ 2048] */,
+    0x451d87ff /* 0008a3b0 - 0008abaf [ 2048] */,
+    0x455d87ff /* 0008abb0 - 0008b3af [ 2048] */,
+    0x459d87ff /* 0008b3b0 - 0008bbaf [ 2048] */,
+    0x45dd87ff /* 0008bbb0 - 0008c3af [ 2048] */,
+    0x461d87ff /* 0008c3b0 - 0008cbaf [ 2048] */,
+    0x465d87ff /* 0008cbb0 - 0008d3af [ 2048] */,
+    0x469d87ff /* 0008d3b0 - 0008dbaf [ 2048] */,
+    0x46dd87ff /* 0008dbb0 - 0008e3af [ 2048] */,
+    0x471d87ff /* 0008e3b0 - 0008ebaf [ 2048] */,
+    0x475d87ff /* 0008ebb0 - 0008f3af [ 2048] */,
+    0x479d87ff /* 0008f3b0 - 0008fbaf [ 2048] */,
+    0x47dd87ff /* 0008fbb0 - 000903af [ 2048] */,
+    0x481d87ff /* 000903b0 - 00090baf [ 2048] */,
+    0x485d87ff /* 00090bb0 - 000913af [ 2048] */,
+    0x489d87ff /* 000913b0 - 00091baf [ 2048] */,
+    0x48dd87ff /* 00091bb0 - 000923af [ 2048] */,
+    0x491d87ff /* 000923b0 - 00092baf [ 2048] */,
+    0x495d87ff /* 00092bb0 - 000933af [ 2048] */,
+    0x499d87ff /* 000933b0 - 00093baf [ 2048] */,
+    0x49dd87ff /* 00093bb0 - 000943af [ 2048] */,
+    0x4a1d87ff /* 000943b0 - 00094baf [ 2048] */,
+    0x4a5d87ff /* 00094bb0 - 000953af [ 2048] */,
+    0x4a9d87ff /* 000953b0 - 00095baf [ 2048] */,
+    0x4add87ff /* 00095bb0 - 000963af [ 2048] */,
+    0x4b1d87ff /* 000963b0 - 00096baf [ 2048] */,
+    0x4b5d87ff /* 00096bb0 - 000973af [ 2048] */,
+    0x4b9d87ff /* 000973b0 - 00097baf [ 2048] */,
+    0x4bdd87ff /* 00097bb0 - 000983af [ 2048] */,
+    0x4c1d87ff /* 000983b0 - 00098baf [ 2048] */,
+    0x4c5d87ff /* 00098bb0 - 000993af [ 2048] */,
+    0x4c9d87ff /* 000993b0 - 00099baf [ 2048] */,
+    0x4cdd87ff /* 00099bb0 - 0009a3af [ 2048] */,
+    0x4d1d87ff /* 0009a3b0 - 0009abaf [ 2048] */,
+    0x4d5d87ff /* 0009abb0 - 0009b3af [ 2048] */,
+    0x4d9d87ff /* 0009b3b0 - 0009bbaf [ 2048] */,
+    0x4ddd87ff /* 0009bbb0 - 0009c3af [ 2048] */,
+    0x4e1d87ff /* 0009c3b0 - 0009cbaf [ 2048] */,
+    0x4e5d87ff /* 0009cbb0 - 0009d3af [ 2048] */,
+    0x4e9d87ff /* 0009d3b0 - 0009dbaf [ 2048] */,
+    0x4edd87ff /* 0009dbb0 - 0009e3af [ 2048] */,
+    0x4f1d87ff /* 0009e3b0 - 0009ebaf [ 2048] */,
+    0x4f5d87ff /* 0009ebb0 - 0009f3af [ 2048] */,
+    0x4f9d87ff /* 0009f3b0 - 0009fbaf [ 2048] */,
+    0x4fdd87ff /* 0009fbb0 - 000a03af [ 2048] */,
+    0x501d87ff /* 000a03b0 - 000a0baf [ 2048] */,
+    0x505d87ff /* 000a0bb0 - 000a13af [ 2048] */,
+    0x509d87ff /* 000a13b0 - 000a1baf [ 2048] */,
+    0x50dd87ff /* 000a1bb0 - 000a23af [ 2048] */,
+    0x511d87ff /* 000a23b0 - 000a2baf [ 2048] */,
+    0x515d87ff /* 000a2bb0 - 000a33af [ 2048] */,
+    0x519d87ff /* 000a33b0 - 000a3baf [ 2048] */,
+    0x51dd87ff /* 000a3bb0 - 000a43af [ 2048] */,
+    0x521d87ff /* 000a43b0 - 000a4baf [ 2048] */,
+    0x525d87ff /* 000a4bb0 - 000a53af [ 2048] */,
+    0x529d87ff /* 000a53b0 - 000a5baf [ 2048] */,
+    0x52dd87ff /* 000a5bb0 - 000a63af [ 2048] */,
+    0x531d87ff /* 000a63b0 - 000a6baf [ 2048] */,
+    0x535d87ff /* 000a6bb0 - 000a73af [ 2048] */,
+    0x539d87ff /* 000a73b0 - 000a7baf [ 2048] */,
+    0x53dd87ff /* 000a7bb0 - 000a83af [ 2048] */,
+    0x541d87ff /* 000a83b0 - 000a8baf [ 2048] */,
+    0x545d87ff /* 000a8bb0 - 000a93af [ 2048] */,
+    0x549d87ff /* 000a93b0 - 000a9baf [ 2048] */,
+    0x54dd87ff /* 000a9bb0 - 000aa3af [ 2048] */,
+    0x551d87ff /* 000aa3b0 - 000aabaf [ 2048] */,
+    0x555d87ff /* 000aabb0 - 000ab3af [ 2048] */,
+    0x559d87ff /* 000ab3b0 - 000abbaf [ 2048] */,
+    0x55dd87ff /* 000abbb0 - 000ac3af [ 2048] */,
+    0x561d87ff /* 000ac3b0 - 000acbaf [ 2048] */,
+    0x565d87ff /* 000acbb0 - 000ad3af [ 2048] */,
+    0x569d87ff /* 000ad3b0 - 000adbaf [ 2048] */,
+    0x56dd87ff /* 000adbb0 - 000ae3af [ 2048] */,
+    0x571d87ff /* 000ae3b0 - 000aebaf [ 2048] */,
+    0x575d87ff /* 000aebb0 - 000af3af [ 2048] */,
+    0x579d87ff /* 000af3b0 - 000afbaf [ 2048] */,
+    0x57dd87ff /* 000afbb0 - 000b03af [ 2048] */,
+    0x581d87ff /* 000b03b0 - 000b0baf [ 2048] */,
+    0x585d87ff /* 000b0bb0 - 000b13af [ 2048] */,
+    0x589d87ff /* 000b13b0 - 000b1baf [ 2048] */,
+    0x58dd87ff /* 000b1bb0 - 000b23af [ 2048] */,
+    0x591d87ff /* 000b23b0 - 000b2baf [ 2048] */,
+    0x595d87ff /* 000b2bb0 - 000b33af [ 2048] */,
+    0x599d87ff /* 000b33b0 - 000b3baf [ 2048] */,
+    0x59dd87ff /* 000b3bb0 - 000b43af [ 2048] */,
+    0x5a1d87ff /* 000b43b0 - 000b4baf [ 2048] */,
+    0x5a5d87ff /* 000b4bb0 - 000b53af [ 2048] */,
+    0x5a9d87ff /* 000b53b0 - 000b5baf [ 2048] */,
+    0x5add87ff /* 000b5bb0 - 000b63af [ 2048] */,
+    0x5b1d87ff /* 000b63b0 - 000b6baf [ 2048] */,
+    0x5b5d87ff /* 000b6bb0 - 000b73af [ 2048] */,
+    0x5b9d87ff /* 000b73b0 - 000b7baf [ 2048] */,
+    0x5bdd87ff /* 000b7bb0 - 000b83af [ 2048] */,
+    0x5c1d87ff /* 000b83b0 - 000b8baf [ 2048] */,
+    0x5c5d87ff /* 000b8bb0 - 000b93af [ 2048] */,
+    0x5c9d87ff /* 000b93b0 - 000b9baf [ 2048] */,
+    0x5cdd87ff /* 000b9bb0 - 000ba3af [ 2048] */,
+    0x5d1d87ff /* 000ba3b0 - 000babaf [ 2048] */,
+    0x5d5d87ff /* 000babb0 - 000bb3af [ 2048] */,
+    0x5d9d87ff /* 000bb3b0 - 000bbbaf [ 2048] */,
+    0x5ddd87ff /* 000bbbb0 - 000bc3af [ 2048] */,
+    0x5e1d87ff /* 000bc3b0 - 000bcbaf [ 2048] */,
+    0x5e5d87ff /* 000bcbb0 - 000bd3af [ 2048] */,
+    0x5e9d87ff /* 000bd3b0 - 000bdbaf [ 2048] */,
+    0x5edd87ff /* 000bdbb0 - 000be3af [ 2048] */,
+    0x5f1d87ff /* 000be3b0 - 000bebaf [ 2048] */,
+    0x5f5d87ff /* 000bebb0 - 000bf3af [ 2048] */,
+    0x5f9d87ff /* 000bf3b0 - 000bfbaf [ 2048] */,
+    0x5fdd87ff /* 000bfbb0 - 000c03af [ 2048] */,
+    0x601d87ff /* 000c03b0 - 000c0baf [ 2048] */,
+    0x605d87ff /* 000c0bb0 - 000c13af [ 2048] */,
+    0x609d87ff /* 000c13b0 - 000c1baf [ 2048] */,
+    0x60dd87ff /* 000c1bb0 - 000c23af [ 2048] */,
+    0x611d87ff /* 000c23b0 - 000c2baf [ 2048] */,
+    0x615d87ff /* 000c2bb0 - 000c33af [ 2048] */,
+    0x619d87ff /* 000c33b0 - 000c3baf [ 2048] */,
+    0x61dd87ff /* 000c3bb0 - 000c43af [ 2048] */,
+    0x621d87ff /* 000c43b0 - 000c4baf [ 2048] */,
+    0x625d87ff /* 000c4bb0 - 000c53af [ 2048] */,
+    0x629d87ff /* 000c53b0 - 000c5baf [ 2048] */,
+    0x62dd87ff /* 000c5bb0 - 000c63af [ 2048] */,
+    0x631d87ff /* 000c63b0 - 000c6baf [ 2048] */,
+    0x635d87ff /* 000c6bb0 - 000c73af [ 2048] */,
+    0x639d87ff /* 000c73b0 - 000c7baf [ 2048] */,
+    0x63dd87ff /* 000c7bb0 - 000c83af [ 2048] */,
+    0x641d87ff /* 000c83b0 - 000c8baf [ 2048] */,
+    0x645d87ff /* 000c8bb0 - 000c93af [ 2048] */,
+    0x649d87ff /* 000c93b0 - 000c9baf [ 2048] */,
+    0x64dd87ff /* 000c9bb0 - 000ca3af [ 2048] */,
+    0x651d87ff /* 000ca3b0 - 000cabaf [ 2048] */,
+    0x655d87ff /* 000cabb0 - 000cb3af [ 2048] */,
+    0x659d87ff /* 000cb3b0 - 000cbbaf [ 2048] */,
+    0x65dd87ff /* 000cbbb0 - 000cc3af [ 2048] */,
+    0x661d87ff /* 000cc3b0 - 000ccbaf [ 2048] */,
+    0x665d87ff /* 000ccbb0 - 000cd3af [ 2048] */,
+    0x669d87ff /* 000cd3b0 - 000cdbaf [ 2048] */,
+    0x66dd87ff /* 000cdbb0 - 000ce3af [ 2048] */,
+    0x671d87ff /* 000ce3b0 - 000cebaf [ 2048] */,
+    0x675d87ff /* 000cebb0 - 000cf3af [ 2048] */,
+    0x679d87ff /* 000cf3b0 - 000cfbaf [ 2048] */,
+    0x67dd87ff /* 000cfbb0 - 000d03af [ 2048] */,
+    0x681d87ff /* 000d03b0 - 000d0baf [ 2048] */,
+    0x685d87ff /* 000d0bb0 - 000d13af [ 2048] */,
+    0x689d87ff /* 000d13b0 - 000d1baf [ 2048] */,
+    0x68dd87ff /* 000d1bb0 - 000d23af [ 2048] */,
+    0x691d87ff /* 000d23b0 - 000d2baf [ 2048] */,
+    0x695d87ff /* 000d2bb0 - 000d33af [ 2048] */,
+    0x699d87ff /* 000d33b0 - 000d3baf [ 2048] */,
+    0x69dd87ff /* 000d3bb0 - 000d43af [ 2048] */,
+    0x6a1d87ff /* 000d43b0 - 000d4baf [ 2048] */,
+    0x6a5d87ff /* 000d4bb0 - 000d53af [ 2048] */,
+    0x6a9d87ff /* 000d53b0 - 000d5baf [ 2048] */,
+    0x6add87ff /* 000d5bb0 - 000d63af [ 2048] */,
+    0x6b1d87ff /* 000d63b0 - 000d6baf [ 2048] */,
+    0x6b5d87ff /* 000d6bb0 - 000d73af [ 2048] */,
+    0x6b9d87ff /* 000d73b0 - 000d7baf [ 2048] */,
+    0x6bdd87ff /* 000d7bb0 - 000d83af [ 2048] */,
+    0x6c1d87ff /* 000d83b0 - 000d8baf [ 2048] */,
+    0x6c5d87ff /* 000d8bb0 - 000d93af [ 2048] */,
+    0x6c9d87ff /* 000d93b0 - 000d9baf [ 2048] */,
+    0x6cdd87ff /* 000d9bb0 - 000da3af [ 2048] */,
+    0x6d1d87ff /* 000da3b0 - 000dabaf [ 2048] */,
+    0x6d5d87ff /* 000dabb0 - 000db3af [ 2048] */,
+    0x6d9d87ff /* 000db3b0 - 000dbbaf [ 2048] */,
+    0x6ddd87ff /* 000dbbb0 - 000dc3af [ 2048] */,
+    0x6e1d87ff /* 000dc3b0 - 000dcbaf [ 2048] */,
+    0x6e5d87ff /* 000dcbb0 - 000dd3af [ 2048] */,
+    0x6e9d87ff /* 000dd3b0 - 000ddbaf [ 2048] */,
+    0x6edd87ff /* 000ddbb0 - 000de3af [ 2048] */,
+    0x6f1d87ff /* 000de3b0 - 000debaf [ 2048] */,
+    0x6f5d87ff /* 000debb0 - 000df3af [ 2048] */,
+    0x6f9d87ff /* 000df3b0 - 000dfbaf [ 2048] */,
+    0x6fdd854f /* 000dfbb0 - 000e00ff [ 1360] */};
 
 /// At the end of the valid Unicode code points space a lot of code points are
 /// either reserved or a noncharacter. Adding all these entries to the
 /// lookup table would add 446 entries to the table (in Unicode 14).
 /// Instead the only the start of the region is stored, every code point in
 /// this region needs to be escaped.
-inline constexpr uint32_t __unallocated_region_lower_bound = 0x000323b0;
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __unallocated_region_lower_bound = 0x000e01f0;
 
 /// Returns whether the code unit needs to be escaped.
 ///
diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h
index 43a074dd8d7002..052ea98c3c3b8c 100644
--- a/libcxx/include/__format/write_escaped.h
+++ b/libcxx/include/__format/write_escaped.h
@@ -101,15 +101,27 @@ _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_Cha
 }
 
 template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
+__is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {
 #  ifdef _LIBCPP_HAS_NO_UNICODE
   // For ASCII assume everything above 127 is printable.
   if (__value > 127)
     return false;
 #  endif
 
+  // [format.string.escaped]/2.2.1.2.1
+  //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
+  //   value whose Unicode property General_Category has a value in the groups
+  //   Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,
   if (!__escaped_output_table::__needs_escape(__value))
-    return false;
+    // [format.string.escaped]/2.2.1.2.2
+    //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
+    //   value with the Unicode property Grapheme_Extend=Yes as described by UAX
+    //   #44 of the Unicode Standard and C is not immediately preceded in S by a
+    //   character P appended to E without translation to an escape sequence,
+    if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=
+                               __extended_grapheme_custer_property_boundary::__property::__Extend)
+      return false;
 
   __formatter::__write_well_formed_escaped_code_unit(__str, __value);
   return true;
@@ -124,8 +136,8 @@ enum class __escape_quotation_mark { __apostrophe, __double_quote };
 
 // [format.string.escaped]/2
 template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
-__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(
+    basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {
   // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
   switch (__value) {
   case _CharT('\t'):
@@ -167,7 +179,7 @@ __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __e
   // TODO FMT determine what to do with shift sequences.
 
   // 2.2.1.2.1 and 2.2.1.2.2 - Escape
-  return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
+  return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));
 }
 
 template <class _CharT>
@@ -175,11 +187,15 @@ _LIBCPP_HIDE_FROM_ABI void
 __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
   __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
 
+  // When the first code unit has the property Grapheme_Extend=Yes it needs to
+  // be escaped. This happens when the previous code unit was also escaped.
+  bool __escape = true;
   while (!__view.__at_end()) {
     auto __first                                  = __view.__position();
     typename __unicode::__consume_result __result = __view.__consume();
     if (__result.__status == __unicode::__consume_result::__ok) {
-      if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
+      __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);
+      if (!__escape)
         // 2.2.1.3 - Add the character
         ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
     } else {
diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf
index 7964758c908f4c..a5b4ab9520aedb 100644
--- a/libcxx/include/streambuf
+++ b/libcxx/include/streambuf
@@ -107,10 +107,12 @@ protected:
 
 */
 
+#include <__assert>
 #include <__config>
 #include <__fwd/streambuf.h>
 #include <__locale>
 #include <__type_traits/is_same.h>
+#include <__utility/is_valid_range.h>
 #include <climits>
 #include <ios>
 #include <iosfwd>
@@ -234,6 +236,9 @@ protected:
   inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
 
   inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gnext, __gend), "[gnext, gend) must be a valid range");
     __binp_ = __gbeg;
     __ninp_ = __gnext;
     __einp_ = __gend;
@@ -249,6 +254,7 @@ protected:
   _LIBCPP_HIDE_FROM_ABI void __pbump(streamsize __n) { __nout_ += __n; }
 
   inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setp(char_type* __pbeg, char_type* __pend) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__pbeg, __pend), "[pbeg, pend) must be a valid range");
     __bout_ = __nout_ = __pbeg;
     __eout_           = __pend;
   }
diff --git a/libcxx/include/variant b/libcxx/include/variant
index 858a49b980bd9a..34150bd452842e 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -657,6 +657,10 @@ private:
 
 } // namespace __visitation
 
+// Adding semi-colons in macro expansions helps clang-format to do a better job.
+// This macro is used to avoid compilation errors due to "stray" semi-colons.
+#  define _LIBCPP_EAT_SEMICOLON static_assert(true, "")
+
 template <size_t _Index, class _Tp>
 struct _LIBCPP_TEMPLATE_VIS __alt {
   using __value_type = _Tp;
@@ -691,11 +695,10 @@ union _LIBCPP_TEMPLATE_VIS __union<_DestructibleTrait, _Index> {};
       __union(const __union&) = default;                                                                               \
       __union(__union&&)      = default;                                                                               \
                                                                                                                        \
-      destructor                                                                                                       \
+      destructor;                                                                                                      \
                                                                                                                        \
-          __union&                                                                                                     \
-          operator=(const __union&) = default;                                                                         \
-      __union& operator=(__union&&) = default;                                                                         \
+      __union& operator=(const __union&) = default;                                                                    \
+      __union& operator=(__union&&)      = default;                                                                    \
                                                                                                                        \
     private:                                                                                                           \
       char __dummy;                                                                                                    \
@@ -705,9 +708,10 @@ union _LIBCPP_TEMPLATE_VIS __union<_DestructibleTrait, _Index> {};
       friend struct __access::__union;                                                                                 \
     }
 
-_LIBCPP_VARIANT_UNION(_Trait::_TriviallyAvailable, ~__union() = default;);
-_LIBCPP_VARIANT_UNION(_Trait::_Available, ~__union(){});
-_LIBCPP_VARIANT_UNION(_Trait::_Unavailable, ~__union() = delete;);
+_LIBCPP_VARIANT_UNION(_Trait::_TriviallyAvailable, ~__union() = default);
+_LIBCPP_VARIANT_UNION(
+    _Trait::_Available, _LIBCPP_HIDE_FROM_ABI ~__union() {} _LIBCPP_EAT_SEMICOLON);
+_LIBCPP_VARIANT_UNION(_Trait::_Unavailable, ~__union() = delete);
 
 #  undef _LIBCPP_VARIANT_UNION
 
@@ -761,23 +765,27 @@ class _LIBCPP_TEMPLATE_VIS __dtor;
       using __base_type::__base_type;                                                                                  \
       using __base_type::operator=;                                                                                    \
                                                                                                                        \
-      __dtor(const __dtor&)                       = default;                                                           \
-      __dtor(__dtor&&)                            = default;                                                           \
-      destructor __dtor& operator=(const __dtor&) = default;                                                           \
-      __dtor& operator=(__dtor&&)                 = default;                                                           \
+      __dtor(const __dtor&)            = default;                                                                      \
+      __dtor(__dtor&&)                 = default;                                                                      \
+      __dtor& operator=(const __dtor&) = default;                                                                      \
+      __dtor& operator=(__dtor&&)      = default;                                                                      \
+      destructor;                                                                                                      \
                                                                                                                        \
     protected:                                                                                                         \
-      inline _LIBCPP_HIDE_FROM_ABI destroy                                                                             \
+      inline _LIBCPP_HIDE_FROM_ABI destroy;                                                                            \
     }
 
 _LIBCPP_VARIANT_DESTRUCTOR(
-    _Trait::_TriviallyAvailable, ~__dtor() = default;
-    , void __destroy() noexcept { this->__index = __variant_npos<__index_t>; });
+    _Trait::_TriviallyAvailable,
+    ~__dtor() = default, //
+    _LIBCPP_HIDE_FROM_ABI void __destroy() noexcept {
+      this->__index = __variant_npos<__index_t>;
+    } _LIBCPP_EAT_SEMICOLON);
 
 _LIBCPP_VARIANT_DESTRUCTOR(
     _Trait::_Available,
-    ~__dtor() { __destroy(); },
-    void __destroy() noexcept {
+    _LIBCPP_HIDE_FROM_ABI ~__dtor() { __destroy(); } _LIBCPP_EAT_SEMICOLON,
+    _LIBCPP_HIDE_FROM_ABI void __destroy() noexcept {
       if (!this->valueless_by_exception()) {
         __visitation::__base::__visit_alt(
             [](auto& __alt) noexcept {
@@ -787,9 +795,9 @@ _LIBCPP_VARIANT_DESTRUCTOR(
             *this);
       }
       this->__index = __variant_npos<__index_t>;
-    });
+    } _LIBCPP_EAT_SEMICOLON);
 
-_LIBCPP_VARIANT_DESTRUCTOR(_Trait::_Unavailable, ~__dtor() = delete;, void __destroy() noexcept = delete;);
+_LIBCPP_VARIANT_DESTRUCTOR(_Trait::_Unavailable, ~__dtor() = delete, void __destroy() noexcept = delete);
 
 #  undef _LIBCPP_VARIANT_DESTRUCTOR
 
@@ -839,20 +847,24 @@ class _LIBCPP_TEMPLATE_VIS __move_constructor;
       using __base_type::operator=;                                                                                    \
                                                                                                                        \
       __move_constructor(const __move_constructor&)            = default;                                              \
-      move_constructor ~__move_constructor()                   = default;                                              \
+      ~__move_constructor()                                    = default;                                              \
       __move_constructor& operator=(const __move_constructor&) = default;                                              \
       __move_constructor& operator=(__move_constructor&&)      = default;                                              \
+      move_constructor;                                                                                                \
     }
 
 _LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_TriviallyAvailable,
-                                 __move_constructor(__move_constructor&& __that) = default;);
+                                 __move_constructor(__move_constructor&& __that) = default);
 
 _LIBCPP_VARIANT_MOVE_CONSTRUCTOR(
     _Trait::_Available,
-    __move_constructor(__move_constructor&& __that) noexcept(__all<is_nothrow_move_constructible_v<_Types>...>::value)
-    : __move_constructor(__valueless_t{}) { this->__generic_construct(*this, std::move(__that)); });
+    _LIBCPP_HIDE_FROM_ABI __move_constructor(__move_constructor&& __that) noexcept(
+        __all<is_nothrow_move_constructible_v<_Types>...>::value)
+    : __move_constructor(__valueless_t{}) {
+      this->__generic_construct(*this, std::move(__that));
+    } _LIBCPP_EAT_SEMICOLON);
 
-_LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete;);
+_LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete);
 
 #  undef _LIBCPP_VARIANT_MOVE_CONSTRUCTOR
 
@@ -869,20 +881,21 @@ class _LIBCPP_TEMPLATE_VIS __copy_constructor;
       using __base_type::__base_type;                                                                                  \
       using __base_type::operator=;                                                                                    \
                                                                                                                        \
-      copy_constructor __copy_constructor(__copy_constructor&&) = default;                                             \
-      ~__copy_constructor()                                     = default;                                             \
-      __copy_constructor& operator=(const __copy_constructor&)  = default;                                             \
-      __copy_constructor& operator=(__copy_constructor&&)       = default;                                             \
-    }
+      __copy_constructor(__copy_constructor&&)                 = default;                                              \
+      ~__copy_constructor()                                    = default;                                              \
+      __copy_constructor& operator=(const __copy_constructor&) = default;                                              \
+      __copy_constructor& operator=(__copy_constructor&&)      = default;                                              \
+      copy_constructor;                                                                                                \
+    } // namespace __variant_detail
 
 _LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_TriviallyAvailable,
-                                 __copy_constructor(const __copy_constructor& __that) = default;);
+                                 __copy_constructor(const __copy_constructor& __that) = default);
 
 _LIBCPP_VARIANT_COPY_CONSTRUCTOR(
-    _Trait::_Available, __copy_constructor(const __copy_constructor& __that)
-    : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); });
+    _Trait::_Available, _LIBCPP_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that)
+    : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); } _LIBCPP_EAT_SEMICOLON);
 
-_LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete;);
+_LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete);
 
 #  undef _LIBCPP_VARIANT_COPY_CONSTRUCTOR
 
@@ -955,22 +968,22 @@ class _LIBCPP_TEMPLATE_VIS __move_assignment;
       __move_assignment(__move_assignment&&)                 = default;                                                \
       ~__move_assignment()                                   = default;                                                \
       __move_assignment& operator=(const __move_assignment&) = default;                                                \
-      move_assignment                                                                                                  \
+      move_assignment;                                                                                                 \
     }
 
 _LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_TriviallyAvailable,
-                                __move_assignment& operator=(__move_assignment&& __that) = default;);
+                                __move_assignment& operator=(__move_assignment&& __that) = default);
 
 _LIBCPP_VARIANT_MOVE_ASSIGNMENT(
     _Trait::_Available,
-    __move_assignment&
+    _LIBCPP_HIDE_FROM_ABI __move_assignment&
     operator=(__move_assignment&& __that) noexcept(
         __all<(is_nothrow_move_constructible_v<_Types> && is_nothrow_move_assignable_v<_Types>)...>::value) {
       this->__generic_assign(std::move(__that));
       return *this;
-    });
+    } _LIBCPP_EAT_SEMICOLON);
 
-_LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_Unavailable, __move_assignment& operator=(__move_assignment&&) = delete;);
+_LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_Unavailable, __move_assignment& operator=(__move_assignment&&) = delete);
 
 #  undef _LIBCPP_VARIANT_MOVE_ASSIGNMENT
 
@@ -987,22 +1000,23 @@ class _LIBCPP_TEMPLATE_VIS __copy_assignment;
       using __base_type::__base_type;                                                                                  \
       using __base_type::operator=;                                                                                    \
                                                                                                                        \
-      __copy_assignment(const __copy_assignment&)                       = default;                                     \
-      __copy_assignment(__copy_assignment&&)                            = default;                                     \
-      ~__copy_assignment()                                              = default;                                     \
-      copy_assignment __copy_assignment& operator=(__copy_assignment&&) = default;                                     \
+      __copy_assignment(const __copy_assignment&)       = default;                                                     \
+      __copy_assignment(__copy_assignment&&)            = default;                                                     \
+      ~__copy_assignment()                              = default;                                                     \
+      __copy_assignment& operator=(__copy_assignment&&) = default;                                                     \
+      copy_assignment;                                                                                                 \
     }
 
 _LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_TriviallyAvailable,
-                                __copy_assignment& operator=(const __copy_assignment& __that) = default;);
+                                __copy_assignment& operator=(const __copy_assignment& __that) = default);
 
 _LIBCPP_VARIANT_COPY_ASSIGNMENT(
-    _Trait::_Available, __copy_assignment& operator=(const __copy_assignment& __that) {
+    _Trait::_Available, _LIBCPP_HIDE_FROM_ABI __copy_assignment& operator=(const __copy_assignment& __that) {
       this->__generic_assign(__that);
       return *this;
-    });
+    } _LIBCPP_EAT_SEMICOLON);
 
-_LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_Unavailable, __copy_assignment& operator=(const __copy_assignment&) = delete;);
+_LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_Unavailable, __copy_assignment& operator=(const __copy_assignment&) = delete);
 
 #  undef _LIBCPP_VARIANT_COPY_ASSIGNMENT
 
diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp
index 7350c1ddf0e901..4119c39772e564 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: LIBCXX-AIX-FIXME
 // XFAIL: !has-64-bit-atomics
 
 // https://github.com/llvm/llvm-project/issues/72893
diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp
index 84dcde5f2784f2..2460765a3c860c 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: LIBCXX-AIX-FIXME
 // XFAIL: !has-64-bit-atomics
 
 // https://github.com/llvm/llvm-project/issues/72893
diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp
index 386a393e355039..4bd303022c0dad 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: LIBCXX-AIX-FIXME
 // XFAIL: !has-64-bit-atomics
 
 // floating-point-type operator-=(floating-point-type) volatile noexcept;
diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp
index afd06d537c7adb..69abb9ae63c38b 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 // UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: LIBCXX-AIX-FIXME
 // XFAIL: !has-64-bit-atomics
 
 // floating-point-type operator+=(floating-point-type) volatile noexcept;
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
index 58067511950703..b458f93601a1ba 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
@@ -57,18 +57,20 @@ int main(int, char**)
         test<char> t2 = t;
     }
     {
-        char g1, g2, g3, p1, p3;
+        char g[3];
+        char p[3];
         test<char> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<char> t2 = t;
     }
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
     {
-        wchar_t g1, g2, g3, p1, p3;
+        wchar_t g[3];
+        wchar_t p[3];
         test<wchar_t> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<wchar_t> t2 = t;
     }
     {
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
index 8a976e77f0f13f..45a8cdf3a23fea 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
@@ -59,10 +59,11 @@ int main(int, char**)
         t2 = t;
     }
     {
-        char g1, g2, g3, p1, p3;
+        char g[3];
+        char p[3];
         test<char> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<char> t2;
         t2 = t;
     }
@@ -73,10 +74,11 @@ int main(int, char**)
         t2 = t;
     }
     {
-        wchar_t g1, g2, g3, p1, p3;
+        wchar_t g[3];
+        wchar_t p[3];
         test<wchar_t> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<wchar_t> t2;
         t2 = t;
     }
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
index c575c2cb12711a..b90c4c053c9155 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
@@ -68,10 +68,11 @@ int main(int, char**)
         t2.swap(t);
     }
     {
-        char g1, g2, g3, p1, p3;
+        char g[3];
+        char p[3];
         test<char> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<char> t2;
         t2.swap(t);
     }
@@ -82,10 +83,11 @@ int main(int, char**)
         t2.swap(t);
     }
     {
-        wchar_t g1, g2, g3, p1, p3;
+        wchar_t g[3];
+        wchar_t p[3];
         test<wchar_t> t;
-        t.setg(&g1, &g2, &g3);
-        t.setp(&p1, &p3);
+        t.setg(&g[0], &g[1], &g[2]);
+        t.setp(&p[0], &p[2]);
         test<wchar_t> t2;
         t2.swap(t);
     }
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp
new file mode 100644
index 00000000000000..becf89b12fdd18
--- /dev/null
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: libcpp-hardening-mode=none
+// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+// <streambuf>
+
+// template <class charT, class traits = char_traits<charT> >
+// class basic_streambuf;
+
+// void setg(char_type* gbeg, char_type* gnext, char_type* gend);
+
+#include <algorithm>
+#include <iterator>
+#include <streambuf>
+#include <string>
+
+#include "check_assertion.h"
+#include "make_string.h"
+#include "test_macros.h"
+
+template <class CharT>
+struct streambuf : public std::basic_streambuf<CharT> {
+  typedef std::basic_streambuf<CharT> base;
+
+  streambuf() {}
+
+  void setg(CharT* gbeg, CharT* gnext, CharT* gend) { base::setg(gbeg, gnext, gend); }
+};
+
+template <class CharT>
+void test() {
+  std::basic_string<CharT> str = MAKE_STRING(CharT, "ABCDEF");
+  CharT arr[6];
+  std::copy(str.begin(), str.end(), arr);
+
+  {
+    streambuf<CharT> buff;
+    TEST_LIBCPP_ASSERT_FAILURE(
+        buff.setg(std::begin(arr) + 1, std::begin(arr), std::end(arr)), "[gbeg, gnext) must be a valid range");
+  }
+  {
+    streambuf<CharT> buff;
+    TEST_LIBCPP_ASSERT_FAILURE(
+        buff.setg(std::begin(arr) + 1, std::begin(arr) + 1, std::begin(arr)), "[gbeg, gend) must be a valid range");
+  }
+  {
+    streambuf<CharT> buff;
+    TEST_LIBCPP_ASSERT_FAILURE(
+        buff.setg(std::begin(arr), std::begin(arr) + 3, std::begin(arr) + 2), "[gnext, gend) must be a valid range");
+  }
+}
+
+int main(int, char**) {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp
new file mode 100644
index 00000000000000..abd42272de508c
--- /dev/null
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: libcpp-hardening-mode=none
+// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+// <streambuf>
+
+// template <class charT, class traits = char_traits<charT> >
+// class basic_streambuf;
+
+// void setp(char_type* pbeg, char_type* pend);
+
+#include <algorithm>
+#include <iterator>
+#include <streambuf>
+#include <string>
+
+#include "check_assertion.h"
+#include "make_string.h"
+#include "test_macros.h"
+
+template <class CharT>
+struct streambuf : public std::basic_streambuf<CharT> {
+  typedef std::basic_streambuf<CharT> base;
+
+  streambuf() {}
+
+  void setp(CharT* pbeg, CharT* pend) { base::setp(pbeg, pend); }
+};
+
+template <class CharT>
+void test() {
+  std::basic_string<CharT> str = MAKE_STRING(CharT, "ABCDEF");
+  CharT arr[6];
+  std::copy(str.begin(), str.end(), arr);
+
+  {
+    streambuf<CharT> buff;
+    TEST_LIBCPP_ASSERT_FAILURE(buff.setp(std::begin(arr) + 3, std::begin(arr)), "[pbeg, pend) must be a valid range");
+  }
+}
+
+int main(int, char**) {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
index bf5c0a51f944ae..96c1e2664f7a64 100644
--- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
@@ -223,7 +223,7 @@ void test_char() {
     static_assert(sizeof(CharT) == 4, "add support for unexpected size");
     // Unicode fitting in a 32-bit wchar_t
 
-    constexpr wchar_t x  = 0x1ffff;
+    constexpr wchar_t x       = 0x1ffff;
     constexpr std::uint32_t y = 0x1ffff;
     static_assert(x == y);
 
@@ -290,7 +290,7 @@ void test_string() {
     test_format(SV("[\"\ud7ff\"]"), SV("[{:?}]"), "\xed\x9f\xbf");              // U+D7FF last valid
 #else
     /* U+D800..D+DFFFF surrogate range */
-    test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf");           // U+D7FF last valid
+    test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid
 #endif
     test_format(SV(R"(["\x{ed}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xed\xa0\x80"); // U+D800
     test_format(SV(R"(["\x{ed}\x{af}\x{bf}"])"), SV("[{:?}]"), "\xed\xaf\xbf"); // U+DBFF
@@ -319,7 +319,8 @@ void test_string() {
     test_format(SV("[\"\u00c3(\"]"), SV("[{:?}]"), L"\xc3\x28");
   }
 
-  test_format(SV(R"(["🤷🏻\u{200d}♂\u{fe0f}"])"), SV("[{:?}]"), SV("🤷🏻‍♂️"));
+  // LWG-3965
+  test_format(SV(R"(["🤷🏻\u{200d}♂️"])"), SV("[{:?}]"), SV("🤷🏻‍♂️"));
 
   // *** Special cases ***
   test_format(SV(R"("\t\n\r\\'\" ")"), SV("{:?}"), SV("\t\n\r\\'\" "));
@@ -336,6 +337,11 @@ void test_string() {
   if constexpr (sizeof(CharT) == 1)
     test_format(SV(R"("\x{80}")"), SV("{:?}"), SV("\x80"));
 
+  // *** P2713R1 examples ***
+  test_format(SV(R"(["\u{301}"])"), SV("[{:?}]"), SV("\u0301"));
+  test_format(SV(R"(["\\\u{301}"])"), SV("[{:?}]"), SV("\\\u0301"));
+  test_format(SV(R"(["ẹ́"])"), SV("[{:?}]"), SV("e\u0301\u0323"));
+
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
   if constexpr (sizeof(CharT) > 1) {
     using V = std::basic_string_view<CharT>;
@@ -373,7 +379,7 @@ void test_string() {
     static_assert(sizeof(CharT) == 4, "add support for unexpected size");
     // Unicode fitting in a 32-bit wchar_t
 
-    constexpr wchar_t x  = 0x1ffff;
+    constexpr wchar_t x       = 0x1ffff;
     constexpr std::uint32_t y = 0x1ffff;
     static_assert(x == y);
 
@@ -406,20 +412,18 @@ void test_format_functions(TestFunction check) {
   check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hellö"));
   check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hellö"));
 
-  check(SV(R"("hello\u{308}")"), SV("{:*>10?}"), SV("hello\u0308"));
-  check(SV(R"(***"hello\u{308}")"), SV("{:*>17?}"), SV("hello\u0308"));
-  check(SV(R"(*"hello\u{308}"**)"), SV("{:*^17?}"), SV("hello\u0308"));
-  check(SV(R"("hello\u{308}"***)"), SV("{:*<17?}"), SV("hello\u0308"));
+  check(SV(R"(***"hellö")"), SV("{:*>10?}"), SV("hello\u0308"));
+  check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hello\u0308"));
+  check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hello\u0308"));
 
-  check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:*>10?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"(***"hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:*>30?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"(*"hello 🤷🏻\u{200d}♂\u{fe0f}"**)"), SV("{:*^30?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}"***)"), SV("{:*<30?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"(***"hello 🤷🏻\u{200d}♂️")"), SV("{:*>22?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"(*"hello 🤷🏻\u{200d}♂️"**)"), SV("{:*^22?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️"***)"), SV("{:*<22?}"), SV("hello 🤷🏻‍♂️"));
 
   // *** width ***
   check(SV(R"("hellö"   )"), SV("{:10?}"), SV("hellö"));
-  check(SV(R"("hello\u{308}"   )"), SV("{:17?}"), SV("hello\u0308"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}"   )"), SV("{:30?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hellö"   )"), SV("{:10?}"), SV("hello\u0308"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️"   )"), SV("{:22?}"), SV("hello 🤷🏻‍♂️"));
 
   // *** precision ***
   check(SV(R"("hell)"), SV("{:.5?}"), SV("hellö"));
@@ -431,9 +435,8 @@ void test_format_functions(TestFunction check) {
   check(SV(R"("hello 🤷🏻)"), SV("{:.9?}"), SV("hello 🤷🏻‍♂️"));
   check(SV(R"("hello 🤷🏻\)"), SV("{:.10?}"), SV("hello 🤷🏻‍♂️"));
   check(SV(R"("hello 🤷🏻\u{200d})"), SV("{:.17?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂)"), SV("{:.18?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\)"), SV("{:.19?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:.28?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️)"), SV("{:.18?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️")"), SV("{:.19?}"), SV("hello 🤷🏻‍♂️"));
 
   // *** width & precision ***
   check(SV(R"("hell#########################)"), SV("{:#<30.5?}"), SV("hellö"));
@@ -445,9 +448,8 @@ void test_format_functions(TestFunction check) {
   check(SV(R"("hello 🤷🏻#####################)"), SV("{:#<30.9?}"), SV("hello 🤷🏻‍♂️"));
   check(SV(R"("hello 🤷🏻\####################)"), SV("{:#<30.10?}"), SV("hello 🤷🏻‍♂️"));
   check(SV(R"("hello 🤷🏻\u{200d}#############)"), SV("{:#<30.17?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂############)"), SV("{:#<30.18?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\###########)"), SV("{:#<30.19?}"), SV("hello 🤷🏻‍♂️"));
-  check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}"###)"), SV("{:#<30.28?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️############)"), SV("{:#<30.18?}"), SV("hello 🤷🏻‍♂️"));
+  check(SV(R"("hello 🤷🏻\u{200d}♂️"###########)"), SV("{:#<30.19?}"), SV("hello 🤷🏻‍♂️"));
 }
 
 template <class CharT>
diff --git a/libcxx/utils/generate_escaped_output_table.py b/libcxx/utils/generate_escaped_output_table.py
index c6bde8f2411cf6..a11ce259096d5a 100755
--- a/libcxx/utils/generate_escaped_output_table.py
+++ b/libcxx/utils/generate_escaped_output_table.py
@@ -39,12 +39,6 @@ class Entry:
 )
 
 
-def filterCoreProperty(element: PropertyRange) -> Optional[PropertyRange]:
-    if element.prop == "Grapheme_Extend":
-        return element
-    return None
-
-
 # https://www.unicode.org/reports/tr44/#GC_Values_Table
 def filterGeneralProperty(element: PropertyRange) -> Optional[PropertyRange]:
     if element.prop in ["Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn"]:
@@ -94,10 +88,9 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]:
 /// The entries of the characters to escape in format's debug string.
 ///
 /// Contains the entries for [format.string.escaped]/2.2.1.2.1
-///   CE is a Unicode encoding and C corresponds to either a UCS scalar value
-///   whose Unicode property General_Category has a value in the groups
-///   Separator (Z) or Other (C) or to a UCS scalar value which has the Unicode
-///   property Grapheme_Extend=Yes, as described by table 12 of UAX #44
+///   CE is a Unicode encoding and C corresponds to a UCS scalar value whose
+///   Unicode property General_Category has a value in the groups Separator (Z)
+///   or Other (C), as described by table 12 of UAX #44
 ///
 /// Separator (Z) consists of General_Category
 /// - Space_Separator,
@@ -112,7 +105,6 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]:
 /// - Unassigned.
 ///
 /// The data is generated from
-/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 /// - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
 ///
 /// The table is similar to the table
@@ -132,7 +124,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]:
 /// lookup table would add 446 entries to the table (in Unicode 14).
 /// Instead the only the start of the region is stored, every code point in
 /// this region needs to be escaped.
-inline constexpr uint32_t __unallocated_region_lower_bound = 0x{unallocated:08x};
+_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __unallocated_region_lower_bound = 0x{unallocated:08x};
 
 /// Returns whether the code unit needs to be escaped.
 ///
@@ -262,7 +254,7 @@ def property_ranges_to_table(ranges: list[PropertyRange]) -> list[Entry]:
     return result
 
 
-cpp_entrytemplate = "    0x{:08x}"
+cpp_entrytemplate = "    0x{:08x} /* {:08x} - {:08x} [{:>5}] */"
 
 
 def generate_cpp_data(ranges: list[PropertyRange], unallocated: int) -> str:
@@ -272,7 +264,15 @@ def generate_cpp_data(ranges: list[PropertyRange], unallocated: int) -> str:
         DATA_ARRAY_TEMPLATE.format(
             size=len(table),
             entries=",\n".join(
-                [cpp_entrytemplate.format(x.lower << 11 | x.offset) for x in table]
+                [
+                    cpp_entrytemplate.format(
+                        x.lower << 11 | x.offset,
+                        x.lower,
+                        x.lower + x.offset,
+                        x.offset + 1,
+                    )
+                    for x in table
+                ]
             ),
             unallocated=unallocated,
         )
@@ -291,12 +291,6 @@ def generate_data_tables() -> str:
         / "unicode"
         / "DerivedGeneralCategory.txt"
     )
-    derived_core_catagory_path = (
-        Path(__file__).absolute().parent
-        / "data"
-        / "unicode"
-        / "DerivedCoreProperties.txt"
-    )
 
     properties = list()
     with derived_general_catagory_path.open(encoding="utf-8") as f:
@@ -308,15 +302,6 @@ def generate_data_tables() -> str:
                 )
             )
         )
-    with derived_core_catagory_path.open(encoding="utf-8") as f:
-        properties.extend(
-            list(
-                filter(
-                    filterCoreProperty,
-                    [x for line in f if (x := parsePropertyLine(line))],
-                )
-            )
-        )
 
     data = compactPropertyRanges(sorted(properties, key=lambda x: x.lower))
 
@@ -328,8 +313,12 @@ def generate_data_tables() -> str:
     #
     # When this region becomes substantially smaller we need to investigate
     # this design.
+    #
+    # Due to P2713R1 Escaping improvements in std::format the range
+    #   E0100..E01EF  ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+    # is no longer part of these entries. This causes an increase in the size
+    # of the table.
     assert data[-1].upper == 0x10FFFF
-    assert data[-1].upper - data[-1].lower > 900000
 
     return "\n".join([generate_cpp_data(data[:-1], data[-1].lower)])
 
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 20088d92bafa2f..7b9c9c6c6c3852 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -1084,10 +1084,62 @@ static void mergeArch(RISCVISAInfo::OrderedExtensionMap &mergedExts,
   }
 }
 
+static void mergeAtomic(DenseMap<unsigned, unsigned>::iterator it,
+                        const InputSectionBase *oldSection,
+                        const InputSectionBase *newSection, unsigned int oldTag,
+                        unsigned int newTag) {
+  using RISCVAttrs::RISCVAtomicAbiTag::AtomicABI;
+  // Same tags stay the same, and UNKNOWN is compatible with anything
+  if (oldTag == newTag || newTag == AtomicABI::UNKNOWN)
+    return;
+
+  switch (oldTag) {
+  case AtomicABI::UNKNOWN:
+    it->getSecond() = newTag;
+    return;
+  case AtomicABI::A6C:
+    switch (newTag) {
+    case AtomicABI::A6S:
+      it->getSecond() = AtomicABI::A6C;
+      return;
+    case AtomicABI::A7:
+      error(toString(oldSection) + " has atomic_abi=" + Twine(oldTag) +
+            " but " + toString(newSection) +
+            " has atomic_abi=" + Twine(newTag));
+      return;
+    };
+
+  case AtomicABI::A6S:
+    switch (newTag) {
+    case AtomicABI::A6C:
+      it->getSecond() = AtomicABI::A6C;
+      return;
+    case AtomicABI::A7:
+      it->getSecond() = AtomicABI::A7;
+      return;
+    };
+
+  case AtomicABI::A7:
+    switch (newTag) {
+    case AtomicABI::A6S:
+      it->getSecond() = AtomicABI::A7;
+      return;
+    case AtomicABI::A6C:
+      error(toString(oldSection) + " has atomic_abi=" + Twine(oldTag) +
+            " but " + toString(newSection) +
+            " has atomic_abi=" + Twine(newTag));
+      return;
+    };
+  default:
+    llvm_unreachable("unknown AtomicABI");
+  };
+}
+
 static RISCVAttributesSection *
 mergeAttributesSection(const SmallVector<InputSectionBase *, 0> &sections) {
   RISCVISAInfo::OrderedExtensionMap exts;
   const InputSectionBase *firstStackAlign = nullptr;
+  const InputSectionBase *firstAtomicAbi = nullptr;
   unsigned firstStackAlignValue = 0, xlen = 0;
   bool hasArch = false;
 
@@ -1134,6 +1186,17 @@ mergeAttributesSection(const SmallVector<InputSectionBase *, 0> &sections) {
       case RISCVAttrs::PRIV_SPEC_MINOR:
       case RISCVAttrs::PRIV_SPEC_REVISION:
         break;
+
+      case llvm::RISCVAttrs::AttrType::ATOMIC_ABI:
+        if (auto i = parser.getAttributeValue(tag.attr)) {
+          auto r = merged.intAttr.try_emplace(tag.attr, *i);
+          if (r.second) {
+            firstAtomicAbi = sec;
+          } else {
+            mergeAtomic(r.first, firstAtomicAbi, sec, r.first->getSecond(), *i);
+          }
+        }
+        continue;
       }
 
       // Fallback for deprecated priv_spec* and other unknown attributes: retain
diff --git a/lld/test/ELF/mips-eh_frame-pic.s b/lld/test/ELF/mips-eh_frame-pic.s
index c04dbdf57b08ad..a84c36b0e5ecdb 100644
--- a/lld/test/ELF/mips-eh_frame-pic.s
+++ b/lld/test/ELF/mips-eh_frame-pic.s
@@ -36,8 +36,8 @@
 # RELOCS:            .rel{{a?}}.eh_frame {
 # ABS32-RELOCS-NEXT:   0x1C R_MIPS_32 .text
 # ABS64-RELOCS-NEXT:   0x1C R_MIPS_64/R_MIPS_NONE/R_MIPS_NONE .text
-# PIC64-RELOCS-NEXT:   0x1C R_MIPS_PC32/R_MIPS_NONE/R_MIPS_NONE .L0
-# PIC32-RELOCS-NEXT:   0x1C R_MIPS_PC32 .L0
+# PIC64-RELOCS-NEXT:   0x1C R_MIPS_PC32/R_MIPS_NONE/R_MIPS_NONE <null>
+# PIC32-RELOCS-NEXT:   0x1C R_MIPS_PC32 <null>
 # RELOCS-NEXT:       }
 
 # ABS64-EH-FRAME: Augmentation data: 0C
diff --git a/lld/test/ELF/riscv-attributes.s b/lld/test/ELF/riscv-attributes.s
index d0ce0941269ec4..77c2c3cb263fd1 100644
--- a/lld/test/ELF/riscv-attributes.s
+++ b/lld/test/ELF/riscv-attributes.s
@@ -44,6 +44,39 @@
 # RUN: not ld.lld a.o b.o c.o diff_stack_align.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=STACK_ALIGN --implicit-check-not=error:
 # STACK_ALIGN: error: diff_stack_align.o:(.riscv.attributes) has stack_align=32 but a.o:(.riscv.attributes) has stack_align=16
 
+## merging atomic_abi values for A6C and A7 lead to an error.
+# RUN: llvm-mc -filetype=obj -triple=riscv64  atomic_abi_A6C.s -o atomic_abi_A6C.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64  atomic_abi_A7.s -o atomic_abi_A7.o
+# RUN: not ld.lld atomic_abi_A6C.o atomic_abi_A7.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ATOMIC_ABI_ERROR --implicit-check-not=error:
+# ATOMIC_ABI_ERROR: error: atomic_abi_A6C.o:(.riscv.attributes) has atomic_abi=1 but atomic_abi_A7.o:(.riscv.attributes) has atomic_abi=3
+
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64  atomic_abi_A6S.s -o atomic_abi_A6S.o
+# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A6C.o -o atomic_abi_A6C_A6S
+# RUN: llvm-readobj -A atomic_abi_A6C_A6S | FileCheck %s --check-prefix=A6C_A6S
+
+# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A7.o -o atomic_abi_A6S_A7
+# RUN: llvm-readobj -A atomic_abi_A6S_A7 | FileCheck %s --check-prefix=A6S_A7
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64  atomic_abi_unknown.s -o atomic_abi_unknown.o
+# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6C.o -o atomic_abi_A6C_unknown
+# RUN: llvm-readobj -A atomic_abi_A6C_unknown | FileCheck %s --check-prefixes=UNKNOWN_A6C
+
+# RUN: ld.lld atomic_abi_unknown.o diff_stack_align.o -o atomic_abi_none_unknown
+# RUN: llvm-readobj -A atomic_abi_none_unknown | FileCheck %s --check-prefixes=UNKNOWN_NONE
+
+# RUN: ld.lld diff_stack_align.o atomic_abi_A6C.o -o atomic_abi_A6C_none
+# RUN: llvm-readobj -A atomic_abi_A6C_none | FileCheck %s --check-prefixes=NONE_A6C
+
+# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6S.o -o atomic_abi_A6S_unknown
+# RUN: llvm-readobj -A atomic_abi_A6S_unknown | FileCheck %s --check-prefix=UNKNOWN_A6S
+
+# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A7.o -o atomic_abi_A7_unknown
+# RUN: llvm-readobj -A atomic_abi_A7_unknown | FileCheck %s --check-prefix=UNKNOWN_A7
+
+# RUN: ld.lld diff_stack_align.o atomic_abi_A7.o -o atomic_abi_A7_none
+# RUN: llvm-readobj -A atomic_abi_A7_none | FileCheck %s --check-prefix=NONE_A7
+
 ## The deprecated priv_spec is not handled as GNU ld does.
 ## Differing priv_spec attributes lead to an absent attribute.
 # RUN: llvm-mc -filetype=obj -triple=riscv64 diff_priv_spec.s -o diff_priv_spec.o
@@ -286,6 +319,175 @@
 .attribute priv_spec, 3
 .attribute priv_spec_minor, 3
 
+#--- atomic_abi_unknown.s
+.attribute atomic_abi, 0
+
+#--- atomic_abi_A6C.s
+.attribute atomic_abi, 1
+
+#--- atomic_abi_A6S.s
+.attribute atomic_abi, 2
+
+#--- atomic_abi_A7.s
+.attribute atomic_abi, 3
+
+#      UNKNOWN_NONE: BuildAttributes {
+# UNKNOWN_NONE-NEXT:   FormatVersion: 0x41
+# UNKNOWN_NONE-NEXT:   Section 1 {
+# UNKNOWN_NONE-NEXT:     SectionLength: 17
+# UNKNOWN_NONE-NEXT:     Vendor: riscv
+# UNKNOWN_NONE-NEXT:     Tag: Tag_File (0x1)
+# UNKNOWN_NONE-NEXT:     Size: 7
+# UNKNOWN_NONE-NEXT:     FileAttributes {
+# UNKNOWN_NONE-NEXT:       Attribute {
+# UNKNOWN_NONE-NEXT:         Tag: 4
+# UNKNOWN_NONE-NEXT:         Value: 32
+# UNKNOWN_NONE-NEXT:         TagName: stack_align
+# UNKNOWN_NONE-NEXT:         Description: Stack alignment is 32-bytes
+# UNKNOWN_NONE-NEXT:       }
+# UNKNOWN_NONE-NEXT:     }
+# UNKNOWN_NONE-NEXT:   }
+# UNKNOWN_NONE-NEXT: }
+
+#      NONE_A6C: BuildAttributes {
+# NONE_A6C-NEXT:   FormatVersion: 0x41
+# NONE_A6C-NEXT:   Section 1 {
+# NONE_A6C-NEXT:     SectionLength: 19
+# NONE_A6C-NEXT:     Vendor: riscv
+# NONE_A6C-NEXT:     Tag: Tag_File (0x1)
+# NONE_A6C-NEXT:     Size: 9
+# NONE_A6C-NEXT:     FileAttributes {
+# NONE_A6C-NEXT:       Attribute {
+# NONE_A6C-NEXT:         Tag: 14
+# NONE_A6C-NEXT:         Value: 1
+# NONE_A6C-NEXT:         TagName: atomic_abi
+# NONE_A6C-NEXT:         Description: Atomic ABI is 1
+# NONE_A6C-NEXT:       }
+# NONE_A6C-NEXT:       Attribute {
+# NONE_A6C-NEXT:         Tag: 4
+# NONE_A6C-NEXT:         Value: 32
+# NONE_A6C-NEXT:         TagName: stack_align
+# NONE_A6C-NEXT:         Description: Stack alignment is 32-bytes
+# NONE_A6C-NEXT:       }
+# NONE_A6C-NEXT:     }
+# NONE_A6C-NEXT:   }
+# NONE_A6C-NEXT: }
+
+#      UNKNOWN_A6C: BuildAttributes {
+# UNKNOWN_A6C-NEXT:   FormatVersion: 0x41
+# UNKNOWN_A6C-NEXT:   Section 1 {
+# UNKNOWN_A6C-NEXT:     SectionLength: 17
+# UNKNOWN_A6C-NEXT:     Vendor: riscv
+# UNKNOWN_A6C-NEXT:     Tag: Tag_File (0x1)
+# UNKNOWN_A6C-NEXT:     Size: 7
+# UNKNOWN_A6C-NEXT:     FileAttributes {
+# UNKNOWN_A6C-NEXT:       Attribute {
+# UNKNOWN_A6C-NEXT:         Tag: 14
+# UNKNOWN_A6C-NEXT:         Value: 1
+# UNKNOWN_A6C-NEXT:         TagName: atomic_abi
+# UNKNOWN_A6C-NEXT:         Description: Atomic ABI is 1
+# UNKNOWN_A6C-NEXT:       }
+# UNKNOWN_A6C-NEXT:     }
+# UNKNOWN_A6C-NEXT:   }
+# UNKNOWN_A6C-NEXT: }
+
+#      UNKNOWN_A6S: BuildAttributes {
+# UNKNOWN_A6S-NEXT:   FormatVersion: 0x41
+# UNKNOWN_A6S-NEXT:   Section 1 {
+# UNKNOWN_A6S-NEXT:     SectionLength:
+# UNKNOWN_A6S-NEXT:     Vendor: riscv
+# UNKNOWN_A6S-NEXT:     Tag: Tag_File (0x1)
+# UNKNOWN_A6S-NEXT:     Size: 7
+# UNKNOWN_A6S-NEXT:     FileAttributes {
+# UNKNOWN_A6S-NEXT:       Attribute {
+# UNKNOWN_A6S-NEXT:         Tag: 14
+# UNKNOWN_A6S-NEXT:         Value: 2
+# UNKNOWN_A6S-NEXT:         TagName: atomic_abi
+# UNKNOWN_A6S-NEXT:         Description: Atomic ABI is 2
+# UNKNOWN_A6S-NEXT:       }
+# UNKNOWN_A6S-NEXT:     }
+# UNKNOWN_A6S-NEXT:   }
+# UNKNOWN_A6S-NEXT: }
+
+#      NONE_A7: BuildAttributes {
+# NONE_A7-NEXT:   FormatVersion: 0x41
+# NONE_A7-NEXT:   Section 1 {
+# NONE_A7-NEXT:     SectionLength: 19
+# NONE_A7-NEXT:     Vendor: riscv
+# NONE_A7-NEXT:     Tag: Tag_File (0x1)
+# NONE_A7-NEXT:     Size: 9
+# NONE_A7-NEXT:     FileAttributes {
+# NONE_A7-NEXT:       Attribute {
+# NONE_A7-NEXT:         Tag: 14
+# NONE_A7-NEXT:         Value: 3
+# NONE_A7-NEXT:         TagName: atomic_abi
+# NONE_A7-NEXT:         Description: Atomic ABI is 3
+# NONE_A7-NEXT:       }
+# NONE_A7-NEXT:       Attribute {
+# NONE_A7-NEXT:         Tag: 4
+# NONE_A7-NEXT:         Value: 32
+# NONE_A7-NEXT:         TagName: stack_align
+# NONE_A7-NEXT:         Description: Stack alignment is 32-bytes
+# NONE_A7-NEXT:       }
+# NONE_A7-NEXT:     }
+# NONE_A7-NEXT:   }
+# NONE_A7-NEXT: }
+
+
+#      UNKNOWN_A7: BuildAttributes {
+# UNKNOWN_A7-NEXT:   FormatVersion: 0x41
+# UNKNOWN_A7-NEXT:   Section 1 {
+# UNKNOWN_A7-NEXT:     SectionLength: 17
+# UNKNOWN_A7-NEXT:     Vendor: riscv
+# UNKNOWN_A7-NEXT:     Tag: Tag_File (0x1)
+# UNKNOWN_A7-NEXT:     Size: 7
+# UNKNOWN_A7-NEXT:     FileAttributes {
+# UNKNOWN_A7-NEXT:       Attribute {
+# UNKNOWN_A7-NEXT:         Tag: 14
+# UNKNOWN_A7-NEXT:         Value: 3
+# UNKNOWN_A7-NEXT:         TagName: atomic_abi
+# UNKNOWN_A7-NEXT:         Description: Atomic ABI is 3
+# UNKNOWN_A7-NEXT:       }
+# UNKNOWN_A7-NEXT:     }
+# UNKNOWN_A7-NEXT:   }
+# UNKNOWN_A7-NEXT: }
+
+#      A6C_A6S: BuildAttributes {
+# A6C_A6S-NEXT:   FormatVersion: 0x41
+# A6C_A6S-NEXT:   Section 1 {
+# A6C_A6S-NEXT:     SectionLength: 17
+# A6C_A6S-NEXT:     Vendor: riscv
+# A6C_A6S-NEXT:     Tag: Tag_File (0x1)
+# A6C_A6S-NEXT:     Size: 7
+# A6C_A6S-NEXT:     FileAttributes {
+# A6C_A6S-NEXT:       Attribute {
+# A6C_A6S-NEXT:         Tag: 14
+# A6C_A6S-NEXT:         Value: 1
+# A6C_A6S-NEXT:         TagName: atomic_abi
+# A6C_A6S-NEXT:         Description: Atomic ABI is 1
+# A6C_A6S-NEXT:       }
+# A6C_A6S-NEXT:     }
+# A6C_A6S-NEXT:   }
+# A6C_A6S-NEXT: }
+
+#      A6S_A7: BuildAttributes {
+# A6S_A7-NEXT:   FormatVersion: 0x41
+# A6S_A7-NEXT:   Section 1 {
+# A6S_A7-NEXT:     SectionLength: 17
+# A6S_A7-NEXT:     Vendor: riscv
+# A6S_A7-NEXT:     Tag: Tag_File (0x1)
+# A6S_A7-NEXT:     Size: 7
+# A6S_A7-NEXT:     FileAttributes {
+# A6S_A7-NEXT:       Attribute {
+# A6S_A7-NEXT:         Tag: 14
+# A6S_A7-NEXT:         Value: 3
+# A6S_A7-NEXT:         TagName: atomic_abi
+# A6S_A7-NEXT:         Description: Atomic ABI is 3
+# A6S_A7-NEXT:       }
+# A6S_A7-NEXT:     }
+# A6S_A7-NEXT:   }
+# A6S_A7-NEXT: }
+
 #--- unknown13.s
 .attribute 13, "0"
 #--- unknown13a.s
diff --git a/lldb/bindings/interface/SBValueDocstrings.i b/lldb/bindings/interface/SBValueDocstrings.i
index 6bab923e8b35a6..59fa807f5ec95c 100644
--- a/lldb/bindings/interface/SBValueDocstrings.i
+++ b/lldb/bindings/interface/SBValueDocstrings.i
@@ -135,6 +135,26 @@ linked list."
 %feature("docstring", "Expands nested expressions like .a->b[0].c[1]->d."
 ) lldb::SBValue::GetValueForExpressionPath;
 
+%feature("docstring", "
+      Return the value as an address.  On failure, LLDB_INVALID_ADDRESS
+      will be returned.  On architectures like AArch64, where the
+      top (unaddressable) bits can be used for authentication,
+      memory tagging, or top byte ignore,  this method will return
+      the value with those top bits cleared.
+
+      GetValueAsUnsigned returns the actual value, with the
+      authentication/Top Byte Ignore/Memory Tagging Extension bits.
+
+      Calling this on a random value which is not a pointer is
+      incorrect.  Call GetType().IsPointerType() if in doubt.
+
+      An SB API program may want to show both the literal byte value
+      and the address it refers to in memory.  These two SBValue
+      methods allow SB API writers to behave appropriately for their
+      interface."
+) lldb::SBValue::GetValueAsAddress;
+
+
 %feature("doctstring", "
     Returns the number for children.
 
diff --git a/lldb/include/lldb/API/SBLineEntry.h b/lldb/include/lldb/API/SBLineEntry.h
index 7c2431ba3c8a51..d70c4fac6ec717 100644
--- a/lldb/include/lldb/API/SBLineEntry.h
+++ b/lldb/include/lldb/API/SBLineEntry.h
@@ -29,6 +29,9 @@ class LLDB_API SBLineEntry {
 
   lldb::SBAddress GetEndAddress() const;
 
+  lldb::SBAddress
+  GetSameLineContiguousAddressRangeEnd(bool include_inlined_functions) const;
+
   explicit operator bool() const;
 
   bool IsValid() const;
diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h
index 7da3335a7234b7..f1b5d1fb92ce29 100644
--- a/lldb/include/lldb/API/SBProcess.h
+++ b/lldb/include/lldb/API/SBProcess.h
@@ -562,6 +562,8 @@ class LLDB_API SBProcess {
 
   lldb::SBScriptObject GetScriptedImplementation();
 
+  void GetStatus(SBStream &status);
+
 protected:
   friend class SBAddress;
   friend class SBBreakpoint;
diff --git a/lldb/include/lldb/API/SBSymbolContextList.h b/lldb/include/lldb/API/SBSymbolContextList.h
index 4026afc213571c..95100d219df20f 100644
--- a/lldb/include/lldb/API/SBSymbolContextList.h
+++ b/lldb/include/lldb/API/SBSymbolContextList.h
@@ -44,6 +44,7 @@ class LLDB_API SBSymbolContextList {
 protected:
   friend class SBModule;
   friend class SBTarget;
+  friend class SBCompileUnit;
 
   lldb_private::SymbolContextList *operator->() const;
 
diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index 823615e6a36df5..feeaa1cb71132b 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -879,6 +879,10 @@ class LLDB_API SBTarget {
                                            uint32_t count,
                                            const char *flavor_string);
 
+  lldb::SBInstructionList ReadInstructions(lldb::SBAddress start_addr,
+                                           lldb::SBAddress end_addr,
+                                           const char *flavor_string);
+
   lldb::SBInstructionList GetInstructions(lldb::SBAddress base_addr,
                                           const void *buf, size_t size);
 
diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h
index 67f55ce7da2877..8f4c4fd56dfb18 100644
--- a/lldb/include/lldb/API/SBValue.h
+++ b/lldb/include/lldb/API/SBValue.h
@@ -68,6 +68,8 @@ class LLDB_API SBValue {
 
   uint64_t GetValueAsUnsigned(uint64_t fail_value = 0);
 
+  lldb::addr_t GetValueAsAddress();
+
   ValueType GetValueType();
 
   // If you call this on a newly created ValueObject, it will always return
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 27a76a652f4063..5838281bcb1a10 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -811,23 +811,34 @@ def request_next(self, threadId):
         command_dict = {"command": "next", "type": "request", "arguments": args_dict}
         return self.send_recv(command_dict)
 
-    def request_stepIn(self, threadId):
+    def request_stepIn(self, threadId, targetId):
         if self.exit_status is not None:
-            raise ValueError("request_continue called after process exited")
-        args_dict = {"threadId": threadId}
+            raise ValueError("request_stepIn called after process exited")
+        args_dict = {"threadId": threadId, "targetId": targetId}
         command_dict = {"command": "stepIn", "type": "request", "arguments": args_dict}
         return self.send_recv(command_dict)
 
+    def request_stepInTargets(self, frameId):
+        if self.exit_status is not None:
+            raise ValueError("request_stepInTargets called after process exited")
+        args_dict = {"frameId": frameId}
+        command_dict = {
+            "command": "stepInTargets",
+            "type": "request",
+            "arguments": args_dict,
+        }
+        return self.send_recv(command_dict)
+
     def request_stepOut(self, threadId):
         if self.exit_status is not None:
-            raise ValueError("request_continue called after process exited")
+            raise ValueError("request_stepOut called after process exited")
         args_dict = {"threadId": threadId}
         command_dict = {"command": "stepOut", "type": "request", "arguments": args_dict}
         return self.send_recv(command_dict)
 
     def request_pause(self, threadId=None):
         if self.exit_status is not None:
-            raise ValueError("request_continue called after process exited")
+            raise ValueError("request_pause called after process exited")
         if threadId is None:
             threadId = self.get_thread_id()
         args_dict = {"threadId": threadId}
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index 23f650d2d36fdd..d56ea5dca14beb 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -218,8 +218,8 @@ def set_global(self, name, value, id=None):
         """Set a top level global variable only."""
         return self.dap_server.request_setVariable(2, name, str(value), id=id)
 
-    def stepIn(self, threadId=None, waitForStop=True):
-        self.dap_server.request_stepIn(threadId=threadId)
+    def stepIn(self, threadId=None, targetId=None, waitForStop=True):
+        self.dap_server.request_stepIn(threadId=threadId, targetId=targetId)
         if waitForStop:
             return self.dap_server.wait_for_stopped()
         return None
diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp
index 99a7b8fe644cb5..216ea6d18eab89 100644
--- a/lldb/source/API/SBLineEntry.cpp
+++ b/lldb/source/API/SBLineEntry.cpp
@@ -67,6 +67,21 @@ SBAddress SBLineEntry::GetEndAddress() const {
   return sb_address;
 }
 
+SBAddress SBLineEntry::GetSameLineContiguousAddressRangeEnd(
+    bool include_inlined_functions) const {
+  LLDB_INSTRUMENT_VA(this);
+
+  SBAddress sb_address;
+  if (m_opaque_up) {
+    AddressRange line_range = m_opaque_up->GetSameLineContiguousAddressRange(
+        include_inlined_functions);
+
+    sb_address.SetAddress(line_range.GetBaseAddress());
+    sb_address.OffsetAddress(line_range.GetByteSize());
+  }
+  return sb_address;
+}
+
 bool SBLineEntry::IsValid() const {
   LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp
index c73348fde3f74d..c37c111c5a58e0 100644
--- a/lldb/source/API/SBProcess.cpp
+++ b/lldb/source/API/SBProcess.cpp
@@ -928,6 +928,14 @@ size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len,
   return bytes_written;
 }
 
+void SBProcess::GetStatus(SBStream &status) {
+  LLDB_INSTRUMENT_VA(this, status);
+
+  ProcessSP process_sp(GetSP());
+  if (process_sp)
+    process_sp->GetStatus(status.ref());
+}
+
 bool SBProcess::GetDescription(SBStream &description) {
   LLDB_INSTRUMENT_VA(this, description);
 
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 75f0444f629114..962ce9ba83cc77 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -2011,6 +2011,30 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
   return sb_instructions;
 }
 
+lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress start_addr,
+                                                   lldb::SBAddress end_addr,
+                                                   const char *flavor_string) {
+  LLDB_INSTRUMENT_VA(this, start_addr, end_addr, flavor_string);
+
+  SBInstructionList sb_instructions;
+
+  TargetSP target_sp(GetSP());
+  if (target_sp) {
+    lldb::addr_t start_load_addr = start_addr.GetLoadAddress(*this);
+    lldb::addr_t end_load_addr = end_addr.GetLoadAddress(*this);
+    if (end_load_addr > start_load_addr) {
+      lldb::addr_t size = end_load_addr - start_load_addr;
+
+      AddressRange range(start_load_addr, size);
+      const bool force_live_memory = true;
+      sb_instructions.SetDisassembler(Disassembler::DisassembleRange(
+          target_sp->GetArchitecture(), nullptr, flavor_string, *target_sp,
+          range, force_live_memory));
+    }
+  }
+  return sb_instructions;
+}
+
 lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr,
                                                   const void *buf,
                                                   size_t size) {
diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp
index 94a8f3ea319e89..c53ec5a7464829 100644
--- a/lldb/source/API/SBValue.cpp
+++ b/lldb/source/API/SBValue.cpp
@@ -909,6 +909,25 @@ uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) {
   return fail_value;
 }
 
+lldb::addr_t SBValue::GetValueAsAddress() {
+  addr_t fail_value = LLDB_INVALID_ADDRESS;
+  ValueLocker locker;
+  lldb::ValueObjectSP value_sp(GetSP(locker));
+  if (value_sp) {
+    bool success = true;
+    uint64_t ret_val = fail_value;
+    ret_val = value_sp->GetValueAsUnsigned(fail_value, &success);
+    if (!success)
+      return fail_value;
+    ProcessSP process_sp = m_opaque_sp->GetProcessSP();
+    if (!process_sp)
+      return ret_val;
+    return process_sp->FixDataAddress(ret_val);
+  }
+
+  return fail_value;
+}
+
 bool SBValue::MightHaveChildren() {
   LLDB_INSTRUMENT_VA(this);
 
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index d3fc487aed4333..9409497f1c81ba 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -1869,15 +1869,15 @@ AppleObjCRuntimeV2::DynamicClassInfoExtractor::ComputeHelper(
       if (loader->IsFullyInitialized()) {
         switch (exe_ctx.GetTargetRef().GetDynamicClassInfoHelper()) {
         case eDynamicClassInfoHelperAuto:
-          LLVM_FALLTHROUGH;
+          [[fallthrough]];
         case eDynamicClassInfoHelperGetRealizedClassList:
           if (m_runtime.m_has_objc_getRealizedClassList_trylock)
             return DynamicClassInfoExtractor::objc_getRealizedClassList_trylock;
-          LLVM_FALLTHROUGH;
+          [[fallthrough]];
         case eDynamicClassInfoHelperCopyRealizedClassList:
           if (m_runtime.m_has_objc_copyRealizedClassList)
             return DynamicClassInfoExtractor::objc_copyRealizedClassList;
-          LLVM_FALLTHROUGH;
+          [[fallthrough]];
         case eDynamicClassInfoHelperRealizedClassesStruct:
           return DynamicClassInfoExtractor::gdb_objc_realized_classes;
         }
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index 3d37bb226a65fd..ae1a77e5be8321 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -2087,7 +2087,7 @@ void GDBRemoteCommunicationServerLLGS::AddProcessThreads(
 GDBRemoteCommunication::PacketResult
 GDBRemoteCommunicationServerLLGS::Handle_qfThreadInfo(
     StringExtractorGDBRemote &packet) {
-  assert(m_debugged_processes.size() == 1 ||
+  assert(m_debugged_processes.size() <= 1 ||
          bool(m_extensions_supported &
               NativeProcessProtocol::Extension::multiprocess));
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 41d81fbcf1b087..12dafd3f5d5d51 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -495,6 +495,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_const_type:
   case DW_TAG_restrict_type:
   case DW_TAG_volatile_type:
+  case DW_TAG_LLVM_ptrauth_type:
   case DW_TAG_atomic_type:
   case DW_TAG_unspecified_type: {
     type_sp = ParseTypeModifier(sc, die, attrs);
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 5da94adb771f86..8fc0f9103f5541 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -4861,7 +4861,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
     case clang::BuiltinType::Kind::OCLQueue:
     case clang::BuiltinType::Kind::OCLReserveID:
     case clang::BuiltinType::Kind::OCLSampler:
-    case clang::BuiltinType::Kind::OMPArraySection:
+    case clang::BuiltinType::Kind::ArraySection:
     case clang::BuiltinType::Kind::OMPArrayShaping:
     case clang::BuiltinType::Kind::OMPIterator:
     case clang::BuiltinType::Kind::Overload:
@@ -6013,7 +6013,7 @@ uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) {
     case clang::BuiltinType::ARCUnbridgedCast:
     case clang::BuiltinType::PseudoObject:
     case clang::BuiltinType::BuiltinFn:
-    case clang::BuiltinType::OMPArraySection:
+    case clang::BuiltinType::ArraySection:
       return 1;
     default:
       return 0;
diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile b/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile
new file mode 100644
index 00000000000000..10495940055b63
--- /dev/null
+++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py b/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py
new file mode 100644
index 00000000000000..382b0e7a81d231
--- /dev/null
+++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py
@@ -0,0 +1,59 @@
+"""Test that SBValue clears non-addressable bits"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestClearSBValueNonAddressableBits(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    # On AArch64 systems, the top bits that are not used for
+    # addressing may be used for TBI, MTE, and/or pointer
+    # authentication.
+    @skipIf(archs=no_match(["aarch64", "arm64", "arm64e"]))
+
+    # Only run this test on systems where TBI is known to be
+    # enabled, so the address mask will clear the TBI bits.
+    @skipUnlessPlatform(["linux"] + lldbplatformutil.getDarwinOSTriples())
+    def test(self):
+        self.source = "main.c"
+        self.build()
+        (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+            self, "break here", lldb.SBFileSpec(self.source, False)
+        )
+
+        if self.TraceOn():
+            self.runCmd("frame variable")
+            self.runCmd("frame variable &count &global")
+
+        frame = thread.GetFrameAtIndex(0)
+
+        count_p = frame.FindVariable("count_p")
+        count_invalid_p = frame.FindVariable("count_invalid_p")
+        self.assertEqual(
+            count_p.GetValueAsUnsigned(), count_invalid_p.GetValueAsAddress()
+        )
+        self.assertNotEqual(
+            count_invalid_p.GetValueAsUnsigned(), count_invalid_p.GetValueAsAddress()
+        )
+        self.assertEqual(5, count_p.Dereference().GetValueAsUnsigned())
+        self.assertEqual(5, count_invalid_p.Dereference().GetValueAsUnsigned())
+
+        global_p = frame.FindVariable("global_p")
+        global_invalid_p = frame.FindVariable("global_invalid_p")
+        self.assertEqual(
+            global_p.GetValueAsUnsigned(), global_invalid_p.GetValueAsAddress()
+        )
+        self.assertNotEqual(
+            global_invalid_p.GetValueAsUnsigned(), global_invalid_p.GetValueAsAddress()
+        )
+        self.assertEqual(10, global_p.Dereference().GetValueAsUnsigned())
+        self.assertEqual(10, global_invalid_p.Dereference().GetValueAsUnsigned())
+
+        main_p = frame.FindVariable("main_p")
+        main_invalid_p = frame.FindVariable("main_invalid_p")
+        self.assertEqual(
+            main_p.GetValueAsUnsigned(), main_invalid_p.GetValueAsAddress()
+        )
diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c b/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c
new file mode 100644
index 00000000000000..1b0e42c50dd678
--- /dev/null
+++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+
+int global = 10;
+
+int main() {
+  int count = 5;
+  int *count_p = &count;
+
+  // Add some metadata in the top byte (this will crash unless the
+  // test is running with TBI enabled, but we won't dereference it)
+
+  intptr_t scratch = (intptr_t)count_p;
+  scratch |= (3ULL << 60);
+  int *count_invalid_p = (int *)scratch;
+
+  int (*main_p)() = main;
+  scratch = (intptr_t)main_p;
+  scratch |= (3ULL << 60);
+  int (*main_invalid_p)() = (int (*)())scratch;
+
+  int *global_p = &global;
+  scratch = (intptr_t)global_p;
+  scratch |= (3ULL << 60);
+  int *global_invalid_p = (int *)scratch;
+
+  return count; // break here
+}
diff --git a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
index ffa0dc943e0693..8f456aaf890c7f 100644
--- a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
+++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
@@ -4,11 +4,23 @@
 
 import dap_server
 import lldbdap_testcase
+import psutil
+from collections import deque
 from lldbsuite.test import lldbutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 
 
+def get_subprocess(process_name):
+    queue = deque([psutil.Process(os.getpid())])
+    while queue:
+        process = queue.popleft()
+        if process.name() == process_name:
+            return process
+        queue.extend(process.children())
+
+    self.assertTrue(False, "No subprocess with name %s found" % process_name)
+
 class TestDAP_console(lldbdap_testcase.DAPTestCaseBase):
     def check_lldb_command(
         self, lldb_command, contains_string, assert_msg, command_escape_prefix="`"
@@ -104,3 +116,49 @@ def test_empty_escape_prefix(self):
             "Help can be invoked",
             command_escape_prefix="",
         )
+
+    @skipIfWindows
+    @skipIfRemote
+    def test_exit_status_message_sigterm(self):
+        source = "main.cpp"
+        program = self.getBuildArtifact("a.out")
+        self.build_and_launch(program, commandEscapePrefix="")
+        breakpoint1_line = line_number(source, "// breakpoint 1")
+        breakpoint_ids = self.set_source_breakpoints(source, [breakpoint1_line])
+        self.continue_to_breakpoints(breakpoint_ids)
+
+        # Kill lldb-server process.
+        process_name = (
+            "debugserver" if platform.system() in ["Darwin"] else "lldb-server"
+        )
+        process = get_subprocess(process_name)
+        process.terminate()
+        process.wait()
+
+        # Get the console output
+        console_output = self.collect_console(1.0)
+
+        # Verify the exit status message is printed.
+        self.assertIn(
+            "exited with status = -1 (0xffffffff) debugserver died with signal SIGTERM",
+            console_output,
+            "Exit status does not contain message 'exited with status'",
+        )
+
+    @skipIfWindows
+    @skipIfRemote
+    def test_exit_status_message_ok(self):
+        source = "main.cpp"
+        program = self.getBuildArtifact("a.out")
+        self.build_and_launch(program, commandEscapePrefix="")
+        self.continue_to_exit()
+
+        # Get the console output
+        console_output = self.collect_console(1.0)
+
+        # Verify the exit status message is printed.
+        self.assertIn(
+            "exited with status = 0 (0x00000000)",
+            console_output,
+            "Exit status does not contain message 'exited with status'",
+        )
diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile b/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile
new file mode 100644
index 00000000000000..f772575cd5613b
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile
@@ -0,0 +1,6 @@
+	
+ENABLE_THREADS := YES
+
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py b/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py
new file mode 100644
index 00000000000000..6296f6554d07e5
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py
@@ -0,0 +1,68 @@
+"""
+Test lldb-dap stepInTargets request
+"""
+
+import dap_server
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+import lldbdap_testcase
+from lldbsuite.test import lldbutil
+
+
+class TestDAP_stepInTargets(lldbdap_testcase.DAPTestCaseBase):
+    @skipIf(
+        archs=no_match(["x86_64"])
+    )  # InstructionControlFlowKind for ARM is not supported yet.
+    def test_basic(self):
+        """
+        Tests the basic stepping in targets with directly calls.
+        """
+        program = self.getBuildArtifact("a.out")
+        self.build_and_launch(program)
+        source = "main.cpp"
+
+        breakpoint_line = line_number(source, "// set breakpoint here")
+        lines = [breakpoint_line]
+        # Set breakpoint in the thread function so we can step the threads
+        breakpoint_ids = self.set_source_breakpoints(source, lines)
+        self.assertEqual(
+            len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
+        )
+        self.continue_to_breakpoints(breakpoint_ids)
+
+        threads = self.dap_server.get_threads()
+        self.assertEqual(len(threads), 1, "expect one thread")
+        tid = threads[0]["id"]
+
+        leaf_frame = self.dap_server.get_stackFrame()
+        self.assertIsNotNone(leaf_frame, "expect a leaf frame")
+
+        # Request all step in targets list and verify the response.
+        step_in_targets_response = self.dap_server.request_stepInTargets(
+            leaf_frame["id"]
+        )
+        self.assertEqual(step_in_targets_response["success"], True, "expect success")
+        self.assertIn(
+            "body", step_in_targets_response, "expect body field in response body"
+        )
+        self.assertIn(
+            "targets",
+            step_in_targets_response["body"],
+            "expect targets field in response body",
+        )
+
+        step_in_targets = step_in_targets_response["body"]["targets"]
+        self.assertEqual(len(step_in_targets), 3, "expect 3 step in targets")
+
+        # Verify the target names are correct.
+        self.assertEqual(step_in_targets[0]["label"], "bar()", "expect bar()")
+        self.assertEqual(step_in_targets[1]["label"], "bar2()", "expect bar2()")
+        self.assertEqual(
+            step_in_targets[2]["label"], "foo(int, int)", "expect foo(int, int)"
+        )
+
+        # Choose to step into second target and verify that we are in bar2()
+        self.stepIn(threadId=tid, targetId=step_in_targets[1]["id"], waitForStop=True)
+        leaf_frame = self.dap_server.get_stackFrame()
+        self.assertIsNotNone(leaf_frame, "expect a leaf frame")
+        self.assertEqual(leaf_frame["name"], "bar2()")
diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp b/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp
new file mode 100644
index 00000000000000..d3c3dbcc139ef0
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp
@@ -0,0 +1,11 @@
+
+int foo(int val, int extra) { return val + extra; }
+
+int bar() { return 22; }
+
+int bar2() { return 54; }
+
+int main(int argc, char const *argv[]) {
+  foo(bar(), bar2()); // set breakpoint here
+  return 0;
+}
diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h
index 8015dec9ba8fe6..5c70a056fea4bf 100644
--- a/lldb/tools/lldb-dap/DAP.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -162,6 +162,8 @@ struct DAP {
   std::vector<std::string> exit_commands;
   std::vector<std::string> stop_commands;
   std::vector<std::string> terminate_commands;
+  // Map step in target id to list of function targets that user can choose.
+  llvm::DenseMap<lldb::addr_t, std::string> step_in_targets;
   // A copy of the last LaunchRequest or AttachRequest so we can reuse its
   // arguments if we get a RestartRequest.
   std::optional<llvm::json::Object> last_launch_or_attach_request;
diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
index 25c5ad56e3d6fe..d0fbb9155715b1 100644
--- a/lldb/tools/lldb-dap/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -50,6 +50,7 @@
 #include <thread>
 #include <vector>
 
+#include "lldb/API/SBStream.h"
 #include "lldb/Host/Config.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -503,6 +504,10 @@ void EventThreadFunction() {
             SendContinuedEvent();
             break;
           case lldb::eStateExited:
+            lldb::SBStream stream;
+            process.GetStatus(stream);
+            g_dap.SendOutput(OutputType::Console, stream.GetData());
+
             // When restarting, we can get an "exited" event for the process we
             // just killed with the old PID, or even with no PID. In that case
             // we don't have to terminate the session.
@@ -1645,7 +1650,7 @@ void request_initialize(const llvm::json::Object &request) {
   // The debug adapter supports the gotoTargetsRequest.
   body.try_emplace("supportsGotoTargetsRequest", false);
   // The debug adapter supports the stepInTargetsRequest.
-  body.try_emplace("supportsStepInTargetsRequest", false);
+  body.try_emplace("supportsStepInTargetsRequest", true);
   // The debug adapter supports the completions request.
   body.try_emplace("supportsCompletionsRequest", true);
   // The debug adapter supports the disassembly request.
@@ -3180,14 +3185,155 @@ void request_stepIn(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
+
+  std::string step_in_target;
+  uint64_t target_id = GetUnsigned(arguments, "targetId", 0);
+  auto it = g_dap.step_in_targets.find(target_id);
+  if (it != g_dap.step_in_targets.end())
+    step_in_target = it->second;
+
+  const bool single_thread = GetBoolean(arguments, "singleThread", false);
+  lldb::RunMode run_mode =
+      single_thread ? lldb::eOnlyThisThread : lldb::eOnlyDuringStepping;
   lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   if (thread.IsValid()) {
     // Remember the thread ID that caused the resume so we can set the
     // "threadCausedFocus" boolean value in the "stopped" events.
     g_dap.focus_tid = thread.GetThreadID();
-    thread.StepInto();
+    thread.StepInto(step_in_target.c_str(), run_mode);
+  } else {
+    response["success"] = llvm::json::Value(false);
+  }
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
+}
+
+// "StepInTargetsRequest": {
+//   "allOf": [ { "$ref": "#/definitions/Request" }, {
+//     "type": "object",
+//     "description": "This request retrieves the possible step-in targets for
+//     the specified stack frame.\nThese targets can be used in the `stepIn`
+//     request.\nClients should only call this request if the corresponding
+//     capability `supportsStepInTargetsRequest` is true.", "properties": {
+//       "command": {
+//         "type": "string",
+//         "enum": [ "stepInTargets" ]
+//       },
+//       "arguments": {
+//         "$ref": "#/definitions/StepInTargetsArguments"
+//       }
+//     },
+//     "required": [ "command", "arguments"  ]
+//   }]
+// },
+// "StepInTargetsArguments": {
+//   "type": "object",
+//   "description": "Arguments for `stepInTargets` request.",
+//   "properties": {
+//     "frameId": {
+//       "type": "integer",
+//       "description": "The stack frame for which to retrieve the possible
+//       step-in targets."
+//     }
+//   },
+//   "required": [ "frameId" ]
+// },
+// "StepInTargetsResponse": {
+//   "allOf": [ { "$ref": "#/definitions/Response" }, {
+//     "type": "object",
+//     "description": "Response to `stepInTargets` request.",
+//     "properties": {
+//       "body": {
+//         "type": "object",
+//         "properties": {
+//           "targets": {
+//             "type": "array",
+//             "items": {
+//               "$ref": "#/definitions/StepInTarget"
+//             },
+//             "description": "The possible step-in targets of the specified
+//             source location."
+//           }
+//         },
+//         "required": [ "targets" ]
+//       }
+//     },
+//     "required": [ "body" ]
+//   }]
+// }
+void request_stepInTargets(const llvm::json::Object &request) {
+  llvm::json::Object response;
+  FillResponse(request, response);
+  auto arguments = request.getObject("arguments");
+
+  g_dap.step_in_targets.clear();
+  lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments);
+  if (frame.IsValid()) {
+    lldb::SBAddress pc_addr = frame.GetPCAddress();
+    lldb::SBAddress line_end_addr =
+        pc_addr.GetLineEntry().GetSameLineContiguousAddressRangeEnd(true);
+    lldb::SBInstructionList insts = g_dap.target.ReadInstructions(
+        pc_addr, line_end_addr, /*flavor_string=*/nullptr);
+
+    if (!insts.IsValid()) {
+      response["success"] = false;
+      response["message"] = "Failed to get instructions for frame.";
+      g_dap.SendJSON(llvm::json::Value(std::move(response)));
+      return;
+    }
+
+    llvm::json::Array step_in_targets;
+    const auto num_insts = insts.GetSize();
+    for (size_t i = 0; i < num_insts; ++i) {
+      lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
+      if (!inst.IsValid())
+        break;
+
+      lldb::addr_t inst_addr = inst.GetAddress().GetLoadAddress(g_dap.target);
+
+      // Note: currently only x86/x64 supports flow kind.
+      lldb::InstructionControlFlowKind flow_kind =
+          inst.GetControlFlowKind(g_dap.target);
+      if (flow_kind == lldb::eInstructionControlFlowKindCall) {
+        // Use call site instruction address as id which is easy to debug.
+        llvm::json::Object step_in_target;
+        step_in_target["id"] = inst_addr;
+
+        llvm::StringRef call_operand_name = inst.GetOperands(g_dap.target);
+        lldb::addr_t call_target_addr;
+        if (call_operand_name.getAsInteger(0, call_target_addr))
+          continue;
+
+        lldb::SBAddress call_target_load_addr =
+            g_dap.target.ResolveLoadAddress(call_target_addr);
+        if (!call_target_load_addr.IsValid())
+          continue;
+
+        // The existing ThreadPlanStepInRange only accept step in target
+        // function with debug info.
+        lldb::SBSymbolContext sc = g_dap.target.ResolveSymbolContextForAddress(
+            call_target_load_addr, lldb::eSymbolContextFunction);
+
+        // The existing ThreadPlanStepInRange only accept step in target
+        // function with debug info.
+        std::string step_in_target_name;
+        if (sc.IsValid() && sc.GetFunction().IsValid())
+          step_in_target_name = sc.GetFunction().GetDisplayName();
+
+        // Skip call sites if we fail to resolve its symbol name.
+        if (step_in_target_name.empty())
+          continue;
+
+        g_dap.step_in_targets.try_emplace(inst_addr, step_in_target_name);
+        step_in_target.try_emplace("label", step_in_target_name);
+        step_in_targets.emplace_back(std::move(step_in_target));
+      }
+    }
+    llvm::json::Object body;
+    body.try_emplace("targets", std::move(step_in_targets));
+    response.try_emplace("body", std::move(body));
   } else {
     response["success"] = llvm::json::Value(false);
+    response["message"] = "Failed to get frame for input frameId.";
   }
   g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
@@ -3904,6 +4050,7 @@ void RegisterRequestCallbacks() {
   g_dap.RegisterRequestCallback("source", request_source);
   g_dap.RegisterRequestCallback("stackTrace", request_stackTrace);
   g_dap.RegisterRequestCallback("stepIn", request_stepIn);
+  g_dap.RegisterRequestCallback("stepInTargets", request_stepInTargets);
   g_dap.RegisterRequestCallback("stepOut", request_stepOut);
   g_dap.RegisterRequestCallback("threads", request_threads);
   g_dap.RegisterRequestCallback("variables", request_variables);
diff --git a/lldb/unittests/Host/linux/HostTest.cpp b/lldb/unittests/Host/linux/HostTest.cpp
index 733909902474d7..8ecaf3ec0decb0 100644
--- a/lldb/unittests/Host/linux/HostTest.cpp
+++ b/lldb/unittests/Host/linux/HostTest.cpp
@@ -69,17 +69,21 @@ TEST_F(HostTest, GetProcessInfo) {
   EXPECT_EQ(HostInfo::GetArchitecture(HostInfo::eArchKindDefault),
             Info.GetArchitecture());
   // Test timings
-  /*
-   * This is flaky in the buildbots on all archs
+  // In some sense this is a pretty trivial test. What it is trying to
+  // accomplish is just to validate that these values are never decreasing
+  // which would be unambiguously wrong. We can not reliably show them
+  // to be always increasing because the microsecond granularity means that,
+  // with hardware variations the number of loop iterations need to always
+  // be increasing for faster and faster machines.
   ASSERT_TRUE(Host::GetProcessInfo(getpid(), Info));
   ProcessInstanceInfo::timespec user_time = Info.GetUserTime();
   static volatile unsigned u = 0;
   for (unsigned i = 0; i < 10'000'000; i++) {
-    u = i;
+    u += i;
   }
+  ASSERT_TRUE(u > 0);
   ASSERT_TRUE(Host::GetProcessInfo(getpid(), Info));
   ProcessInstanceInfo::timespec next_user_time = Info.GetUserTime();
-  ASSERT_TRUE(user_time.tv_sec < next_user_time.tv_sec ||
-              user_time.tv_usec < next_user_time.tv_usec);
-  */
+  ASSERT_TRUE(user_time.tv_sec <= next_user_time.tv_sec ||
+              user_time.tv_usec <= next_user_time.tv_usec);
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index ff7e8f7be58c6c..a4f3fd099167a2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -14143,6 +14143,41 @@ Semantics:
 """"""""""
 See description of '``llvm.instrprof.increment``' intrinsic.
 
+'``llvm.instrprof.callsite``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.instrprof.callsite(ptr <name>, i64 <hash>,
+                                            i32 <num-counters>,
+                                            i32 <index>, ptr <callsite>)
+
+Overview:
+"""""""""
+
+.. FIXME: detail when it's emitted once the support is added
+
+The '``llvm.instrprof.callsite``' intrinsic should be emitted before a callsite
+that's not to a "fake" callee (like another intrinsic or asm).
+
+Arguments:
+""""""""""
+The first 4 arguments are similar to ``llvm.instrprof.increment``. The indexing
+is specific to callsites, meaning callsites are indexed from 0, independent from
+the indexes used by the other intrinsics (such as 
+``llvm.instrprof.increment[.step]``).
+
+The last argument is the called value of the callsite this intrinsic precedes.
+
+Semantics:
+""""""""""
+.. FIXME: detail how when the lowering pass is added.
+
+This is lowered by contextual profiling.
+
 '``llvm.instrprof.timestamp``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 6ef6ec20da671d..1be8db602a15c4 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -112,6 +112,11 @@ Changes to the RISC-V Backend
 * The experimental Ssqosid extension is supported.
 * Zacas is no longer experimental.
 * Added the CSR names from the Resumable Non-Maskable Interrupts (Smrnmi) extension.
+* The default atomics mapping was changed to emit an additional trailing fence
+  for sequentially consistent stores, offering compatibility with a future
+  mapping using load-acquire and store-release instructions while remaining
+  fully compatible with objects produced prior to this change. The mapping
+  (ABI) used is recorded as an ELF attribute.
 
 Changes to the WebAssembly Backend
 ----------------------------------
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 70d6b09a0c895c..e7c71041454557 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -999,6 +999,13 @@ END_TWO_BYTE_PACK()
   /// If Flags is not in a defined state then this has no effect.
   void intersectFlagsWith(const SDNodeFlags Flags);
 
+  bool hasPoisonGeneratingFlags() const {
+    SDNodeFlags Flags = getFlags();
+    return Flags.hasNoUnsignedWrap() || Flags.hasNoSignedWrap() ||
+           Flags.hasExact() || Flags.hasDisjoint() || Flags.hasNonNeg() ||
+           Flags.hasNoNaNs() || Flags.hasNoInfs();
+  }
+
   void setCFIType(uint32_t Type) { CFIType = Type; }
   uint32_t getCFIType() const { return CFIType; }
 
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 60f41b30e91c24..cb514cde95b51a 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -46,6 +46,7 @@ typedef unsigned ID;
 
 class AssemblyAnnotationWriter;
 class Constant;
+class ConstantRange;
 struct DenormalMode;
 class DISubprogram;
 enum LibFunc : unsigned;
@@ -462,6 +463,9 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
   /// attributes for the given arg.
   void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
 
+  /// adds the range attribute to the list of attributes for the return value.
+  void addRangeRetAttr(const ConstantRange &CR);
+
   MaybeAlign getParamAlign(unsigned ArgNo) const {
     return AttributeSets.getParamAlignment(ArgNo);
   }
diff --git a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h
index a3ebde709ae6e1..7525c9eb758bef 100644
--- a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h
+++ b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h
@@ -76,7 +76,7 @@ void GenericConvergenceVerifier<ContextT>::visit(const InstructionT &I) {
           "Entry intrinsic cannot be preceded by a convergent operation in the "
           "same basic block.",
           {Context.print(&I)});
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case CONV_ANCHOR:
     Check(!TokenDef,
           "Entry or anchor intrinsic cannot have a convergencectrl token "
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 01f24f70b15756..c3f97026c84dc2 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -1497,6 +1497,7 @@ class InstrProfInstBase : public IntrinsicInst {
     case Intrinsic::instrprof_cover:
     case Intrinsic::instrprof_increment:
     case Intrinsic::instrprof_increment_step:
+    case Intrinsic::instrprof_callsite:
     case Intrinsic::instrprof_timestamp:
     case Intrinsic::instrprof_value_profile:
       return true;
@@ -1581,6 +1582,21 @@ class InstrProfIncrementInstStep : public InstrProfIncrementInst {
   }
 };
 
+/// This represents the llvm.instrprof.callsite intrinsic.
+/// It is structurally like the increment or step counters, hence the
+/// inheritance relationship, albeit somewhat tenuous (it's not 'counting' per
+/// se)
+class InstrProfCallsite : public InstrProfCntrInstBase {
+public:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::instrprof_callsite;
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+  Value *getCallee() const;
+};
+
 /// This represents the llvm.instrprof.timestamp intrinsic.
 class InstrProfTimestampInst : public InstrProfCntrInstBase {
 public:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9d7ab0fd2050bc..16f3f9d9fcc67e 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -914,6 +914,11 @@ def int_instrprof_increment_step : Intrinsic<[],
                                         [llvm_ptr_ty, llvm_i64_ty,
                                          llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>;
 
+// Callsite instrumentation for contextual profiling
+def int_instrprof_callsite : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_i64_ty,
+                                         llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>;
+
 // A timestamp for instrumentation based profiling.
 def int_instrprof_timestamp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty,
                                              llvm_i32_ty, llvm_i32_ty]>;
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index 1309b17bff9c14..1ff029d44d376c 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -156,7 +156,7 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> TAB,
                                     std::unique_ptr<MCObjectWriter> OW,
                                     std::unique_ptr<MCCodeEmitter> Emitter,
-                                    bool RelaxAll, bool IsThumb, bool IsAndroid);
+                                    bool IsThumb, bool IsAndroid);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index 47051447404d00..5038b87cd1dc90 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -92,39 +92,33 @@ createAsmStreamer(MCContext &Ctx, std::unique_ptr<formatted_raw_ostream> OS,
 MCStreamer *createELFStreamer(MCContext &Ctx,
                               std::unique_ptr<MCAsmBackend> &&TAB,
                               std::unique_ptr<MCObjectWriter> &&OW,
-                              std::unique_ptr<MCCodeEmitter> &&CE,
-                              bool RelaxAll);
+                              std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createGOFFStreamer(MCContext &Ctx,
                                std::unique_ptr<MCAsmBackend> &&TAB,
                                std::unique_ptr<MCObjectWriter> &&OW,
-                               std::unique_ptr<MCCodeEmitter> &&CE,
-                               bool RelaxAll);
+                               std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createMachOStreamer(MCContext &Ctx,
                                 std::unique_ptr<MCAsmBackend> &&TAB,
                                 std::unique_ptr<MCObjectWriter> &&OW,
                                 std::unique_ptr<MCCodeEmitter> &&CE,
-                                bool RelaxAll, bool DWARFMustBeAtTheEnd,
+                                bool DWARFMustBeAtTheEnd,
                                 bool LabelSections = false);
 MCStreamer *createWasmStreamer(MCContext &Ctx,
                                std::unique_ptr<MCAsmBackend> &&TAB,
                                std::unique_ptr<MCObjectWriter> &&OW,
-                               std::unique_ptr<MCCodeEmitter> &&CE,
-                               bool RelaxAll);
+                               std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createXCOFFStreamer(MCContext &Ctx,
                                 std::unique_ptr<MCAsmBackend> &&TAB,
                                 std::unique_ptr<MCObjectWriter> &&OW,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                bool RelaxAll);
+                                std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createSPIRVStreamer(MCContext &Ctx,
                                 std::unique_ptr<MCAsmBackend> &&TAB,
                                 std::unique_ptr<MCObjectWriter> &&OW,
-                                std::unique_ptr<MCCodeEmitter> &&CE,
-                                bool RelaxAll);
+                                std::unique_ptr<MCCodeEmitter> &&CE);
 MCStreamer *createDXContainerStreamer(MCContext &Ctx,
                                       std::unique_ptr<MCAsmBackend> &&TAB,
                                       std::unique_ptr<MCObjectWriter> &&OW,
-                                      std::unique_ptr<MCCodeEmitter> &&CE,
-                                      bool RelaxAll);
+                                      std::unique_ptr<MCCodeEmitter> &&CE);
 
 MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
 
@@ -199,42 +193,42 @@ class Target {
       MCStreamer *(*)(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using GOFFStreamerCtorTy =
       MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using MachOStreamerCtorTy =
       MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter,
                       bool DWARFMustBeAtTheEnd);
   using COFFStreamerCtorTy =
       MCStreamer *(*)(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter,
                       bool IncrementalLinkerCompatible);
   using WasmStreamerCtorTy =
       MCStreamer *(*)(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using XCOFFStreamerCtorTy =
       MCStreamer *(*)(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
   using SPIRVStreamerCtorTy =
       MCStreamer *(*)(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
-  
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
+
   using DXContainerStreamerCtorTy =
       MCStreamer *(*)(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll);
+                      std::unique_ptr<MCCodeEmitter> &&Emitter);
 
   using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S);
   using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)(
@@ -566,7 +560,7 @@ class Target {
                                      std::unique_ptr<MCAsmBackend> &&TAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
                                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     const MCSubtargetInfo &STI, bool RelaxAll,
+                                     const MCSubtargetInfo &STI, bool,
                                      bool IncrementalLinkerCompatible,
                                      bool DWARFMustBeAtTheEnd) const {
     MCStreamer *S = nullptr;
@@ -577,66 +571,63 @@ class Target {
       assert((T.isOSWindows() || T.isUEFI()) &&
              "only Windows and UEFI COFF are supported");
       S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                             std::move(Emitter), RelaxAll,
-                             IncrementalLinkerCompatible);
+                             std::move(Emitter), IncrementalLinkerCompatible);
       break;
     case Triple::MachO:
       if (MachOStreamerCtorFn)
         S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll,
-                                DWARFMustBeAtTheEnd);
+                                std::move(Emitter), DWARFMustBeAtTheEnd);
       else
         S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll,
-                                DWARFMustBeAtTheEnd);
+                                std::move(Emitter), DWARFMustBeAtTheEnd);
       break;
     case Triple::ELF:
       if (ELFStreamerCtorFn)
         S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
+                              std::move(Emitter));
       else
         S = createELFStreamer(Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
+                              std::move(Emitter));
       break;
     case Triple::Wasm:
       if (WasmStreamerCtorFn)
         S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
+                               std::move(Emitter));
       else
         S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
+                               std::move(Emitter));
       break;
     case Triple::GOFF:
       if (GOFFStreamerCtorFn)
         S = GOFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
+                               std::move(Emitter));
       else
         S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                               std::move(Emitter), RelaxAll);
+                               std::move(Emitter));
       break;
     case Triple::XCOFF:
       if (XCOFFStreamerCtorFn)
         S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
+                                std::move(Emitter));
       else
         S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
+                                std::move(Emitter));
       break;
     case Triple::SPIRV:
       if (SPIRVStreamerCtorFn)
         S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
+                                std::move(Emitter));
       else
         S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW),
-                                std::move(Emitter), RelaxAll);
+                                std::move(Emitter));
       break;
     case Triple::DXContainer:
       if (DXContainerStreamerCtorFn)
         S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
+                                      std::move(Emitter));
       else
         S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW),
-                                      std::move(Emitter), RelaxAll);
+                                      std::move(Emitter));
       break;
     }
     if (ObjectTargetStreamerCtorFn)
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 4494d9b96189bc..1d457be93741f2 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -801,8 +801,9 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
   } else if (EF.getHeader().e_machine == ELF::EM_RISCV) {
     if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) {
       StringRef Name = *NameOrErr;
-      // Mark fake labels (used for label differences) and mapping symbols.
-      if (Name == ".L0 " || Name.starts_with("$d") || Name.starts_with("$x"))
+      // Mark empty name symbols (used for label differences) and mapping
+      // symbols.
+      if (Name.empty() || Name.starts_with("$d") || Name.starts_with("$x"))
         Result |= SymbolRef::SF_FormatSpecific;
     } else {
       // TODO: Actually report errors helpfully.
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index a8e98f8bb13861..d378c3696f8d0b 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -44,6 +44,12 @@ enum class Meta : uint64_t {
 
 using MemProfSchema = llvm::SmallVector<Meta, static_cast<int>(Meta::Size)>;
 
+// Returns the full schema currently in use.
+MemProfSchema getFullSchema();
+
+// Returns the schema consisting of the fields used for hot cold memory hinting.
+MemProfSchema getHotColdSchema();
+
 // Holds the actual MemInfoBlock data with all fields. Contents may be read or
 // written partially by providing an appropriate schema to the serialize and
 // deserialize methods.
@@ -116,22 +122,6 @@ struct PortableMemInfoBlock {
 
   void clear() { *this = PortableMemInfoBlock(); }
 
-  // Returns the full schema currently in use.
-  static MemProfSchema getFullSchema() {
-    MemProfSchema List;
-#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
-#include "llvm/ProfileData/MIBEntryDef.inc"
-#undef MIBEntryDef
-    return List;
-  }
-
-  // Returns the schema consisting of the fields currently consumed by the
-  // compiler.
-  static MemProfSchema getHotColdSchema() {
-    return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
-            Meta::TotalLifetimeAccessDensity};
-  }
-
   bool operator==(const PortableMemInfoBlock &Other) const {
 #define MIBEntryDef(NameTag, Name, Type)                                       \
   if (Other.get##Name() != get##Name())                                        \
diff --git a/llvm/include/llvm/Support/RISCVAttributeParser.h b/llvm/include/llvm/Support/RISCVAttributeParser.h
index 305adffbe851e7..9f295504de959e 100644
--- a/llvm/include/llvm/Support/RISCVAttributeParser.h
+++ b/llvm/include/llvm/Support/RISCVAttributeParser.h
@@ -24,6 +24,7 @@ class RISCVAttributeParser : public ELFAttributeParser {
 
   Error unalignedAccess(unsigned tag);
   Error stackAlign(unsigned tag);
+  Error atomicAbi(unsigned tag);
 
 public:
   RISCVAttributeParser(ScopedPrinter *sw)
diff --git a/llvm/include/llvm/Support/RISCVAttributes.h b/llvm/include/llvm/Support/RISCVAttributes.h
index 18f5a84d21f250..5def890a727355 100644
--- a/llvm/include/llvm/Support/RISCVAttributes.h
+++ b/llvm/include/llvm/Support/RISCVAttributes.h
@@ -32,8 +32,21 @@ enum AttrType : unsigned {
   PRIV_SPEC = 8,
   PRIV_SPEC_MINOR = 10,
   PRIV_SPEC_REVISION = 12,
+  ATOMIC_ABI = 14,
 };
 
+namespace RISCVAtomicAbiTag {
+enum AtomicABI : unsigned {
+  // Values for Tag_RISCV_atomic_abi
+  // Defined at
+  // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#tag_riscv_atomic_abi-14-uleb128version
+  UNKNOWN = 0,
+  A6C = 1,
+  A6S = 2,
+  A7 = 3,
+};
+} // namespace RISCVAtomicAbiTag
+
 enum { NOT_ALLOWED = 0, ALLOWED = 1 };
 
 } // namespace RISCVAttrs
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index ea3520835fa07d..1684b424e3b442 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1979,6 +1979,7 @@ class Pattern<dag patternToMatch, list<dag> resultInstrs> {
   list<dag>       ResultInstrs    = resultInstrs;
   list<Predicate> Predicates      = [];  // See class Instruction in Target.td.
   int             AddedComplexity = 0;   // See class Instruction in Target.td.
+  bit           GISelShouldIgnore = 0;
 }
 
 // Pat - A simple (but common) form of a pattern, which produces a simple result
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b1ba8e7c0f6014..f65515ca387229 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
 namespace {
 struct DepDistanceStrideAndSizeInfo {
   const SCEV *Dist;
-  uint64_t Stride;
+  uint64_t StrideA;
+  uint64_t StrideB;
   uint64_t TypeByteSize;
   bool AIsWrite;
   bool BIsWrite;
 
-  DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
-                               uint64_t TypeByteSize, bool AIsWrite,
-                               bool BIsWrite)
-      : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
-        AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+  DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
+                               uint64_t StrideB, uint64_t TypeByteSize,
+                               bool AIsWrite, bool BIsWrite)
+      : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
+        TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
 };
 } // namespace
 
-// Get the dependence distance, stride, type size and whether it is a write for
+// Get the dependence distance, strides, type size and whether it is a write for
 // the dependence between A and B. Returns a DepType, if we can prove there's
 // no dependence or the analysis fails. Outlined to lambda to limit he scope
 // of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
                                    InnermostLoop))
     return MemoryDepChecker::Dependence::IndirectUnsafe;
 
-  // Need accesses with constant stride. We don't want to vectorize
-  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
-  // in the address space.
-  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
+  // Need accesses with constant strides and the same direction. We don't want
+  // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
+  // could wrap in the address space.
+  if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
+      (StrideAPtr < 0 && StrideBPtr > 0)) {
     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
     return MemoryDepChecker::Dependence::Unknown;
   }
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
   if (!HasSameSize)
     TypeByteSize = 0;
-  uint64_t Stride = std::abs(StrideAPtr);
-  return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite,
-                                      BIsWrite);
+  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
+                                      std::abs(StrideBPtr), TypeByteSize,
+                                      AIsWrite, BIsWrite);
 }
 
 MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   if (std::holds_alternative<Dependence::DepType>(Res))
     return std::get<Dependence::DepType>(Res);
 
-  const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
+  const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
   bool HasSameSize = TypeByteSize > 0;
 
+  std::optional<uint64_t> CommonStride =
+      StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
+  if (isa<SCEVCouldNotCompute>(Dist)) {
+    // TODO: Relax requirement that there is a common stride to retry with
+    // non-constant distance dependencies.
+    FoundNonConstantDistanceDependence |= !!CommonStride;
+    LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
+    return Dependence::Unknown;
+  }
+
   ScalarEvolution &SE = *PSE.getSE();
   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+
   // If the distance between the acecsses is larger than their absolute stride
   // multiplied by the backedge taken count, the accesses are independet, i.e.
   // they are far enough appart that accesses won't access the same location
   // across all loop ierations.
-  if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
+  if (HasSameSize && CommonStride &&
       isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
-                               Stride, TypeByteSize))
+                               *CommonStride, TypeByteSize))
     return Dependence::NoDep;
 
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
-  if (!C) {
-    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
-    FoundNonConstantDistanceDependence = true;
-    return Dependence::Unknown;
-  }
 
-  const APInt &Val = C->getAPInt();
-  int64_t Distance = Val.getSExtValue();
-
-  // If the distance between accesses and their strides are known constants,
-  // check whether the accesses interlace each other.
-  if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
-      areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
-    LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
-    return Dependence::NoDep;
+  // Attempt to prove strided accesses independent.
+  if (C) {
+    const APInt &Val = C->getAPInt();
+    int64_t Distance = Val.getSExtValue();
+
+    // If the distance between accesses and their strides are known constants,
+    // check whether the accesses interlace each other.
+    if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
+        HasSameSize &&
+        areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
+                                      TypeByteSize)) {
+      LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+      return Dependence::NoDep;
+    }
   }
 
   // Negative distances are not plausible dependencies.
-  if (Val.isNegative()) {
+  if (SE.isKnownNonPositive(Dist)) {
+    if (SE.isKnownNonNegative(Dist)) {
+      if (HasSameSize) {
+        // Write to the same location with the same size.
+        return Dependence::Forward;
+      } else {
+        LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
+                             "different type sizes\n");
+        return Dependence::Unknown;
+      }
+    }
+
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     // Check if the first access writes to a location that is read in a later
     // iteration, where the distance between them is not a multiple of a vector
@@ -2071,27 +2095,40 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
     // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
     // forward dependency will allow vectorization using any width.
-    if (IsTrueDataDependence && EnableForwardingConflictDetection &&
-        (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
-                                                      TypeByteSize))) {
-      LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
-      return Dependence::ForwardButPreventsForwarding;
+
+    if (IsTrueDataDependence && EnableForwardingConflictDetection) {
+      if (!C) {
+        // TODO: FoundNonConstantDistanceDependence is used as a necessary
+        // condition to consider retrying with runtime checks. Historically, we
+        // did not set it when strides were different but there is no inherent
+        // reason to.
+        FoundNonConstantDistanceDependence |= CommonStride.has_value();
+        return Dependence::Unknown;
+      }
+      if (!HasSameSize ||
+          couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
+                                       TypeByteSize)) {
+        LLVM_DEBUG(
+            dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
+        return Dependence::ForwardButPreventsForwarding;
+      }
     }
 
     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
     return Dependence::Forward;
   }
 
-  // Write to the same location with the same size.
-  if (Val == 0) {
-    if (HasSameSize)
-      return Dependence::Forward;
-    LLVM_DEBUG(
-        dbgs() << "LAA: Zero dependence difference but different type sizes\n");
+  if (!C) {
+    // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
+    // to consider retrying with runtime checks. Historically, we did not set it
+    // when strides were different but there is no inherent reason to.
+    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
     return Dependence::Unknown;
   }
 
-  assert(Val.isStrictlyPositive() && "Expect a positive value");
+  if (!SE.isKnownPositive(Dist))
+    return Dependence::Unknown;
 
   if (!HasSameSize) {
     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2099,6 +2136,14 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     return Dependence::Unknown;
   }
 
+  // The logic below currently only supports StrideA ==  StrideB, i.e. there's a
+  // common stride.
+  if (!CommonStride)
+    return Dependence::Unknown;
+
+  const APInt &Val = C->getAPInt();
+  int64_t Distance = Val.getSExtValue();
+
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
                            VectorizerParams::VectorizationFactor : 1);
@@ -2134,7 +2179,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   // the minimum distance needed is 28, which is greater than distance. It is
   // not safe to do vectorization.
   uint64_t MinDistanceNeeded =
-      TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+      TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
   if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
                       << Distance << '\n');
@@ -2183,7 +2228,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
 
   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
   // since there is a backwards dependency.
-  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
+  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
   LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
                     << " with max VF = " << MaxVF << '\n');
   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 22a766f8d62524..8eaf78157550ee 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1431,10 +1431,8 @@ static bool SinkCast(CastInst *CI) {
     if (!InsertedCast) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
       assert(InsertPt != UserBB->end());
-      InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
-                                      CI->getType(), "");
+      InsertedCast = cast<CastInst>(CI->clone());
       InsertedCast->insertBefore(*UserBB, InsertPt);
-      InsertedCast->setDebugLoc(CI->getDebugLoc());
     }
 
     // Replace a use of the cast with a use of the new cast.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index ad23d9d1835bb1..4c8be4d400e40c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2223,7 +2223,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     // addresses. We can treat it like a normal dbg_value intrinsic here; to
     // benefit from the full analysis of stack/SSA locations, GlobalISel would
     // need to register for and use the AssignmentTrackingAnalysis pass.
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
     const DbgValueInst &DI = cast<DbgValueInst>(CI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fd265b12d73ca4..aa746f1c7b7b3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7620,6 +7620,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
 static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
                                   SDNode *N) {
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   auto peekThroughResize = [](SDValue V) {
     if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
@@ -7642,16 +7643,16 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
     if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
                                                   /* AllowUndefs */ false)) {
       if (peekThroughResize(NotOperand) == N1Resized)
-        return DAG.getNode(ISD::OR, SDLoc(N), VT,
-                           DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
+        return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
+                           N1);
     }
 
     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
     if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
                                                   /* AllowUndefs */ false)) {
       if (peekThroughResize(NotOperand) == N1Resized)
-        return DAG.getNode(ISD::OR, SDLoc(N), VT,
-                           DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
+        return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
+                           N1);
     }
   }
 
@@ -7659,13 +7660,13 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
 
   // fold or (xor X, N1), N1 --> or X, N1
   if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
-    return DAG.getNode(ISD::OR, SDLoc(N), VT, X, N1);
+    return DAG.getNode(ISD::OR, DL, VT, X, N1);
 
   // fold or (xor x, y), (x and/or y) --> or x, y
   if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
       (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
        sd_match(N1, m_Or(m_Specific(X), m_Specific(Y)))))
-    return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Y);
+    return DAG.getNode(ISD::OR, DL, VT, X, Y);
 
   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
     return R;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 659966b7ba55f5..761e72e6dce702 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1424,7 +1424,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     // happened (such as an optimised function being always-inlined into an
     // optnone function). We will not be using the extra information in the
     // dbg.assign in that case, just use its dbg.value fields.
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
     const DbgValueInst *DI = cast<DbgValueInst>(II);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 24f69ea1b742a6..8413cd60135f7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3195,7 +3195,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       break;
     }
 
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   }
   case ISD::BITCAST:
     if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a4d293687a8739..7c5a280bdd1b9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5128,6 +5128,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   if (VT.isScalableVector())
     return true;
 
+  if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
+    return true;
+
   unsigned Opcode = Op.getOpcode();
   switch (Opcode) {
   case ISD::FREEZE:
@@ -5167,34 +5170,20 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
       return true;
 
     const TargetOptions &Options = getTarget().Options;
-    return Options.NoNaNsFPMath || Options.NoInfsFPMath ||
-           (ConsiderFlags &&
-            (Op->getFlags().hasNoNaNs() || Op->getFlags().hasNoInfs()));
+    return Options.NoNaNsFPMath || Options.NoInfsFPMath;
   }
 
-  // Matches hasPoisonGeneratingFlags().
+  case ISD::OR:
   case ISD::ZERO_EXTEND:
-    return ConsiderFlags && Op->getFlags().hasNonNeg();
-
   case ISD::ADD:
   case ISD::SUB:
   case ISD::MUL:
-    // Matches hasPoisonGeneratingFlags().
-    return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
-                             Op->getFlags().hasNoUnsignedWrap());
+    // No poison except from flags (which is handled above)
+    return false;
 
   case ISD::SHL:
     // If the max shift amount isn't in range, then the shift can create poison.
-    if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
-      return true;
-
-    // Matches hasPoisonGeneratingFlags().
-    return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
-                             Op->getFlags().hasNoUnsignedWrap());
-
-  // Matches hasPoisonGeneratingFlags().
-  case ISD::OR:
-    return ConsiderFlags && Op->getFlags().hasDisjoint();
+    return !getValidMaximumShiftAmountConstant(Op, DemandedElts);
 
   case ISD::SCALAR_TO_VECTOR:
     // Check if we demand any upper (undef) elements.
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 2c87b344083edb..bba3329e8cc269 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -16,6 +16,10 @@
 #if defined(LLVM_ON_UNIX) && !defined(__ANDROID__)
 #include <fcntl.h>
 #include <sys/mman.h>
+#if defined(__MVS__)
+#include "llvm/Support/BLAKE3.h"
+#include <sys/shm.h>
+#endif
 #include <unistd.h>
 #elif defined(_WIN32)
 #include <windows.h>
@@ -239,6 +243,24 @@ void SharedMemoryMapper::reserve(size_t NumBytes,
 
 #if defined(LLVM_ON_UNIX)
 
+#if defined(__MVS__)
+        ArrayRef<uint8_t> Data(
+            reinterpret_cast<const uint8_t *>(SharedMemoryName.c_str()),
+            SharedMemoryName.size());
+        auto HashedName = BLAKE3::hash<sizeof(key_t)>(Data);
+        key_t Key = *reinterpret_cast<key_t *>(HashedName.data());
+        int SharedMemoryId =
+            shmget(Key, NumBytes, IPC_CREAT | __IPC_SHAREAS | 0700);
+        if (SharedMemoryId < 0) {
+          return OnReserved(errorCodeToError(
+              std::error_code(errno, std::generic_category())));
+        }
+        LocalAddr = shmat(SharedMemoryId, nullptr, 0);
+        if (LocalAddr == reinterpret_cast<void *>(-1)) {
+          return OnReserved(errorCodeToError(
+              std::error_code(errno, std::generic_category())));
+        }
+#else
         int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR, 0700);
         if (SharedMemoryFile < 0) {
           return OnReserved(errorCodeToError(errnoAsErrorCode()));
@@ -254,6 +276,7 @@ void SharedMemoryMapper::reserve(size_t NumBytes,
         }
 
         close(SharedMemoryFile);
+#endif
 
 #elif defined(_WIN32)
 
@@ -373,8 +396,13 @@ void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
 
 #if defined(LLVM_ON_UNIX)
 
+#if defined(__MVS__)
+      if (shmdt(Reservations[Base].LocalAddr) < 0)
+        Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode()));
+#else
       if (munmap(Reservations[Base].LocalAddr, Reservations[Base].Size) != 0)
         Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode()));
+#endif
 
 #elif defined(_WIN32)
 
@@ -415,7 +443,11 @@ SharedMemoryMapper::~SharedMemoryMapper() {
 
 #if defined(LLVM_ON_UNIX) && !defined(__ANDROID__)
 
+#if defined(__MVS__)
+    shmdt(R.second.LocalAddr);
+#else
     munmap(R.second.LocalAddr, R.second.Size);
+#endif
 
 #elif defined(_WIN32)
 
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 6614beec760fb3..f5118c0f2bfa43 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -18,6 +18,10 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#if defined(__MVS__)
+#include "llvm/Support/BLAKE3.h"
+#include <sys/shm.h>
+#endif
 #include <unistd.h>
 #endif
 
@@ -59,6 +63,21 @@ ExecutorSharedMemoryMapperService::reserve(uint64_t Size) {
     SharedMemoryName = SharedMemoryNameStream.str();
   }
 
+#if defined(__MVS__)
+  ArrayRef<uint8_t> Data(
+      reinterpret_cast<const uint8_t *>(SharedMemoryName.c_str()),
+      SharedMemoryName.size());
+  auto HashedName = BLAKE3::hash<sizeof(key_t)>(Data);
+  key_t Key = *reinterpret_cast<key_t *>(HashedName.data());
+  int SharedMemoryId =
+      shmget(Key, Size, IPC_CREAT | IPC_EXCL | __IPC_SHAREAS | 0700);
+  if (SharedMemoryId < 0)
+    return errorCodeToError(errnoAsErrorCode());
+
+  void *Addr = shmat(SharedMemoryId, nullptr, 0);
+  if (Addr == reinterpret_cast<void *>(-1))
+    return errorCodeToError(errnoAsErrorCode());
+#else
   int SharedMemoryFile =
       shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0700);
   if (SharedMemoryFile < 0)
@@ -73,6 +92,7 @@ ExecutorSharedMemoryMapperService::reserve(uint64_t Size) {
     return errorCodeToError(errnoAsErrorCode());
 
   close(SharedMemoryFile);
+#endif
 
 #elif defined(_WIN32)
 
@@ -131,6 +151,9 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
 
 #if defined(LLVM_ON_UNIX)
 
+#if defined(__MVS__)
+      // TODO Is it possible to change the protection level?
+#else
     int NativeProt = 0;
     if ((Segment.RAG.Prot & MemProt::Read) == MemProt::Read)
       NativeProt |= PROT_READ;
@@ -141,6 +164,7 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
 
     if (mprotect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt))
       return errorCodeToError(errnoAsErrorCode());
+#endif
 
 #elif defined(_WIN32)
 
@@ -239,8 +263,15 @@ Error ExecutorSharedMemoryMapperService::release(
 
 #if defined(LLVM_ON_UNIX)
 
+#if defined(__MVS__)
+    (void)Size;
+
+    if (shmdt(Base.toPtr<void *>()) < 0)
+      Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode()));
+#else
     if (munmap(Base.toPtr<void *>(), Size) != 0)
       Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode()));
+#endif
 
 #elif defined(_WIN32)
     (void)Size;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index e66fe73425e863..545940dd86f90f 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -735,6 +735,10 @@ void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
                                                                   ArgNo, Bytes);
 }
 
+void Function::addRangeRetAttr(const ConstantRange &CR) {
+  AttributeSets = AttributeSets.addRangeRetAttr(getContext(), CR);
+}
+
 DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
   if (&FPType == &APFloat::IEEEsingle()) {
     DenormalMode Mode = getDenormalModeF32Raw();
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 89403e1d7fcb4d..8faeb4e9951f74 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -291,6 +291,12 @@ Value *InstrProfIncrementInst::getStep() const {
   return ConstantInt::get(Type::getInt64Ty(Context), 1);
 }
 
+Value *InstrProfCallsite::getCallee() const {
+  if (isa<InstrProfCallsite>(this))
+    return getArgOperand(4);
+  return nullptr;
+}
+
 std::optional<RoundingMode> ConstrainedFPIntrinsic::getRoundingMode() const {
   unsigned NumOperands = arg_size();
   Metadata *MD = nullptr;
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index bcca01ba7f3fdc..f28c157e035c7d 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6308,7 +6308,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     break;
   }
   case Intrinsic::experimental_convergence_entry:
-    LLVM_FALLTHROUGH;
   case Intrinsic::experimental_convergence_anchor:
     break;
   case Intrinsic::experimental_convergence_loop:
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index b8ef2654ed6e3b..005521bad6e014 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -725,13 +725,7 @@ void ELFWriter::computeSymbolTable(
         HasLargeSectionIndex = true;
     }
 
-    // Temporary symbols generated for certain assembler features (.eh_frame,
-    // .debug_line) of an empty name may be referenced by relocations due to
-    // linker relaxation. Rename them to ".L0 " to match the gas fake label name
-    // and allow ld/objcopy --discard-locals to discard such symbols.
     StringRef Name = Symbol.getName();
-    if (Name.empty())
-      Name = ".L0 ";
 
     // Sections have their own string table
     if (Symbol.getType() != ELF::STT_SECTION) {
diff --git a/llvm/lib/MC/MCDXContainerStreamer.cpp b/llvm/lib/MC/MCDXContainerStreamer.cpp
index 3cb452f3dfa554..a596c9a16d498d 100644
--- a/llvm/lib/MC/MCDXContainerStreamer.cpp
+++ b/llvm/lib/MC/MCDXContainerStreamer.cpp
@@ -21,11 +21,8 @@ void MCDXContainerStreamer::emitInstToData(const MCInst &,
 
 MCStreamer *llvm::createDXContainerStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB,
-    std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&CE,
-    bool RelaxAll) {
+    std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&CE) {
   auto *S = new MCDXContainerStreamer(Context, std::move(MAB), std::move(OW),
                                       std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index e541090769e9e5..23e926c3a9d14b 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -892,11 +892,8 @@ void MCELFStreamer::createAttributesSection(
 MCStreamer *llvm::createELFStreamer(MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&CE,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&CE) {
   MCELFStreamer *S =
       new MCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp
index 58d13c9f378853..2b6d5c8e75a706 100644
--- a/llvm/lib/MC/MCGOFFStreamer.cpp
+++ b/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -24,11 +24,8 @@ MCGOFFStreamer::~MCGOFFStreamer() {}
 MCStreamer *llvm::createGOFFStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&CE,
-                                     bool RelaxAll) {
+                                     std::unique_ptr<MCCodeEmitter> &&CE) {
   MCGOFFStreamer *S =
       new MCGOFFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index d7d343f15eaa61..10f9988b9d16a0 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -564,7 +564,7 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
                                       std::unique_ptr<MCAsmBackend> &&MAB,
                                       std::unique_ptr<MCObjectWriter> &&OW,
                                       std::unique_ptr<MCCodeEmitter> &&CE,
-                                      bool RelaxAll, bool DWARFMustBeAtTheEnd,
+                                      bool DWARFMustBeAtTheEnd,
                                       bool LabelSections) {
   MCMachOStreamer *S =
       new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE),
@@ -574,8 +574,6 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
       Target, Context.getObjectFileInfo()->getSDKVersion(),
       Context.getObjectFileInfo()->getDarwinTargetVariantTriple(),
       Context.getObjectFileInfo()->getDarwinTargetVariantSDKVersion());
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
 
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 490e0a4dd40417..d2da5d0d3f90f2 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -34,6 +34,8 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
       EmitEHFrame(true), EmitDebugFrame(false) {
   if (Assembler->getBackendPtr())
     setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
+  if (Context.getTargetOptions() && Context.getTargetOptions()->MCRelaxAll)
+    Assembler->setRelaxAll(true);
 }
 
 MCObjectStreamer::~MCObjectStreamer() = default;
diff --git a/llvm/lib/MC/MCSPIRVStreamer.cpp b/llvm/lib/MC/MCSPIRVStreamer.cpp
index 0bb73c7ff7ee2d..3b75a2e17a4a98 100644
--- a/llvm/lib/MC/MCSPIRVStreamer.cpp
+++ b/llvm/lib/MC/MCSPIRVStreamer.cpp
@@ -34,11 +34,8 @@ void MCSPIRVStreamer::emitInstToData(const MCInst &Inst,
 MCStreamer *llvm::createSPIRVStreamer(MCContext &Context,
                                       std::unique_ptr<MCAsmBackend> &&MAB,
                                       std::unique_ptr<MCObjectWriter> &&OW,
-                                      std::unique_ptr<MCCodeEmitter> &&CE,
-                                      bool RelaxAll) {
+                                      std::unique_ptr<MCCodeEmitter> &&CE) {
   MCSPIRVStreamer *S = new MCSPIRVStreamer(Context, std::move(MAB),
                                            std::move(OW), std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index fbab72fb5f3d36..c553ede77555a5 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -275,11 +275,8 @@ void MCWasmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
 MCStreamer *llvm::createWasmStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&CE,
-                                     bool RelaxAll) {
+                                     std::unique_ptr<MCCodeEmitter> &&CE) {
   MCWasmStreamer *S =
       new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 458b4be6198387..175d7d6b6c31a6 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -162,12 +162,9 @@ void MCXCOFFStreamer::emitInstToData(const MCInst &Inst,
 MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
                                       std::unique_ptr<MCAsmBackend> &&MAB,
                                       std::unique_ptr<MCObjectWriter> &&OW,
-                                      std::unique_ptr<MCCodeEmitter> &&CE,
-                                      bool RelaxAll) {
+                                      std::unique_ptr<MCCodeEmitter> &&CE) {
   MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB),
                                            std::move(OW), std::move(CE));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 70ae57f77daef7..e1846fcbffee52 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -508,7 +508,7 @@ static Error writeMemProfV0(
   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
+  auto Schema = memprof::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
@@ -534,7 +534,7 @@ static Error writeMemProfV1(
   OS.write(0ULL); // Reserve space for the memprof frame payload offset.
   OS.write(0ULL); // Reserve space for the memprof frame table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
+  auto Schema = memprof::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
@@ -565,9 +565,9 @@ static Error writeMemProfV2(
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof call stack table offset.
 
-  auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema();
+  auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
-    Schema = memprof::PortableMemInfoBlock::getFullSchema();
+    Schema = memprof::getFullSchema();
   writeMemProfSchema(OS, Schema);
 
   uint64_t RecordTableOffset =
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 9a46d1151311f4..4667778ca11dd0 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -10,6 +10,19 @@
 
 namespace llvm {
 namespace memprof {
+MemProfSchema getFullSchema() {
+  MemProfSchema List;
+#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
+#include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+  return List;
+}
+
+MemProfSchema getHotColdSchema() {
+  return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
+          Meta::TotalLifetimeAccessDensity};
+}
+
 static size_t serializedSizeV0(const IndexedAllocationInfo &IAI,
                                const MemProfSchema &Schema) {
   size_t Size = 0;
diff --git a/llvm/lib/Support/RISCVAttributeParser.cpp b/llvm/lib/Support/RISCVAttributeParser.cpp
index 7ce4b6ab161cd3..19c5a0e06903f6 100644
--- a/llvm/lib/Support/RISCVAttributeParser.cpp
+++ b/llvm/lib/Support/RISCVAttributeParser.cpp
@@ -36,7 +36,18 @@ const RISCVAttributeParser::DisplayHandler
         {
             RISCVAttrs::UNALIGNED_ACCESS,
             &RISCVAttributeParser::unalignedAccess,
-        }};
+        },
+        {
+            RISCVAttrs::ATOMIC_ABI,
+            &RISCVAttributeParser::atomicAbi,
+        },
+};
+
+Error RISCVAttributeParser::atomicAbi(unsigned Tag) {
+  uint64_t Value = de.getULEB128(cursor);
+  printAttribute(Tag, Value, "Atomic ABI is " + utostr(Value));
+  return Error::success();
+}
 
 Error RISCVAttributeParser::unalignedAccess(unsigned tag) {
   static const char *strings[] = {"No unaligned access", "Unaligned access"};
diff --git a/llvm/lib/Support/RISCVAttributes.cpp b/llvm/lib/Support/RISCVAttributes.cpp
index 9e629760d3d842..dc70d65acba063 100644
--- a/llvm/lib/Support/RISCVAttributes.cpp
+++ b/llvm/lib/Support/RISCVAttributes.cpp
@@ -18,6 +18,7 @@ static constexpr TagNameItem tagData[] = {
     {PRIV_SPEC, "Tag_priv_spec"},
     {PRIV_SPEC_MINOR, "Tag_priv_spec_minor"},
     {PRIV_SPEC_REVISION, "Tag_priv_spec_revision"},
+    {ATOMIC_ABI, "Tag_atomic_abi"},
 };
 
 constexpr TagNameMap RISCVAttributeTags{tagData};
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 419c141121c325..c86c98eed24f08 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1296,7 +1296,7 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
   }
   case AArch64::LDPQpost:
     Imm = -Imm;
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case AArch64::STPQpre: {
     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 223898e9d634d8..8e9782c1930c3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18663,14 +18663,12 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  // Optimise concat_vectors of two [us]avgceils or [us]avgfloors with a 128-bit
-  // destination size, combine into an avg of two contacts of the source
-  // vectors. eg: concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c),
-  // concat(b, d))
+  // Optimise concat_vectors of two identical binops with a 128-bit destination
+  // size, combine into an binop of two contacts of the source vectors. eg:
+  // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
   if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
-      (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS ||
-       N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS) &&
-      N0->hasOneUse() && N1->hasOneUse()) {
+      DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
+      N1->hasOneUse()) {
     SDValue N00 = N0->getOperand(0);
     SDValue N01 = N0->getOperand(1);
     SDValue N10 = N1->getOperand(0);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index ad21f2673a6412..f5bea3336cbf73 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -306,13 +306,12 @@ llvm::createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
   return new AArch64TargetAsmStreamer(S, OS);
 }
 
-MCELFStreamer *llvm::createAArch64ELFStreamer(
-    MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
-    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool RelaxAll) {
+MCELFStreamer *
+llvm::createAArch64ELFStreamer(MCContext &Context,
+                               std::unique_ptr<MCAsmBackend> TAB,
+                               std::unique_ptr<MCObjectWriter> OW,
+                               std::unique_ptr<MCCodeEmitter> Emitter) {
   AArch64ELFStreamer *S = new AArch64ELFStreamer(
       Context, std::move(TAB), std::move(OW), std::move(Emitter));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 25c609ee1496b3..e6df79ba19d4cf 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -20,8 +20,7 @@ namespace llvm {
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context,
                                         std::unique_ptr<MCAsmBackend> TAB,
                                         std::unique_ptr<MCObjectWriter> OW,
-                                        std::unique_ptr<MCCodeEmitter> Emitter,
-                                        bool RelaxAll);
+                                        std::unique_ptr<MCCodeEmitter> Emitter);
 }
 
 #endif
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 043f0a03b79756..0dd4a78f962d41 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -378,30 +378,28 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
                                      std::unique_ptr<MCAsmBackend> &&TAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     bool RelaxAll) {
+                                     std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createAArch64ELFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                  std::move(Emitter), RelaxAll);
+                                  std::move(Emitter));
 }
 
 static MCStreamer *createMachOStreamer(MCContext &Ctx,
                                        std::unique_ptr<MCAsmBackend> &&TAB,
                                        std::unique_ptr<MCObjectWriter> &&OW,
                                        std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                       bool RelaxAll,
                                        bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, std::move(TAB), std::move(OW),
-                             std::move(Emitter), RelaxAll, DWARFMustBeAtTheEnd,
+                             std::move(Emitter), DWARFMustBeAtTheEnd,
                              /*LabelSections*/ true);
 }
 
 static MCStreamer *
 createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&TAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                      std::unique_ptr<MCCodeEmitter> &&Emitter,
                       bool IncrementalLinkerCompatible) {
   return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
-                                      std::move(Emitter), RelaxAll,
+                                      std::move(Emitter),
                                       IncrementalLinkerCompatible);
 }
 
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index 438ac6cc47885e..c25cc2e99adcab 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -294,7 +294,7 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFISaveAnyRegQPX(unsigned Reg,
 MCWinCOFFStreamer *llvm::createAArch64WinCOFFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
     std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool RelaxAll, bool IncrementalLinkerCompatible) {
+    bool IncrementalLinkerCompatible) {
   auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB),
                                        std::move(Emitter), std::move(OW));
   S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index 8c0656652eed2a..a13b1a451be5fd 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -21,7 +21,7 @@ namespace llvm {
 MCWinCOFFStreamer *createAArch64WinCOFFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
     std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool RelaxAll, bool IncrementalLinkerCompatible);
+    bool IncrementalLinkerCompatible);
 } // end llvm namespace
 
 #endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2b81f5d51032dd..8abe9920c02c2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1493,6 +1493,7 @@ def FeatureISAVersion11_Generic: FeatureSet<
     [FeatureMSAALoadDstSelBug,
      FeatureVALUTransUseHazard,
      FeatureUserSGPRInit16Bug,
+     FeatureMADIntraFwdBug,
      FeaturePrivEnabledTrap2NopBug,
      FeatureRequiresCOV6])>;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index aa4ec785bf02a3..56345d14a331ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2261,7 +2261,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
   case AMDGPU::G_FCMP:
     if (!Subtarget.hasSALUFloatInsts())
       break;
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case AMDGPU::G_ICMP:
   case AMDGPU::G_UADDO:
   case AMDGPU::G_USUBO:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
index 1ce7012040daa4..4e9a33227a5dcb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -28,10 +28,11 @@ class AMDGPUELFStreamer : public MCELFStreamer {
 
 }
 
-MCELFStreamer *llvm::createAMDGPUELFStreamer(
-    const Triple &T, MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
-    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool RelaxAll) {
+MCELFStreamer *
+llvm::createAMDGPUELFStreamer(const Triple &T, MCContext &Context,
+                              std::unique_ptr<MCAsmBackend> MAB,
+                              std::unique_ptr<MCObjectWriter> OW,
+                              std::unique_ptr<MCCodeEmitter> Emitter) {
   return new AMDGPUELFStreamer(T, Context, std::move(MAB), std::move(OW),
                                std::move(Emitter));
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index e09e2dca1b47af..f9ece5f22b0f7a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -26,8 +26,7 @@ class Triple;
 MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context,
                                        std::unique_ptr<MCAsmBackend> MAB,
                                        std::unique_ptr<MCObjectWriter> OW,
-                                       std::unique_ptr<MCCodeEmitter> Emitter,
-                                       bool RelaxAll);
+                                       std::unique_ptr<MCCodeEmitter> Emitter);
 } // namespace llvm.
 
 #endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 4700a984770bfb..30dd384051b940 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -112,10 +112,9 @@ static MCTargetStreamer *createAMDGPUNullTargetStreamer(MCStreamer &S) {
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createAMDGPUELFStreamer(T, Context, std::move(MAB), std::move(OW),
-                                 std::move(Emitter), RelaxAll);
+                                 std::move(Emitter));
 }
 
 namespace {
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 028db9d17e300a..e54314cc7d00af 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -11104,7 +11104,7 @@ ARMAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
       return Match_MnemonicFail;
     }
   }
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   default:
     return Match_Success;
   }
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 1d80af590d16e0..afd7dccbeca9b3 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1487,8 +1487,7 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> TAB,
                                     std::unique_ptr<MCObjectWriter> OW,
                                     std::unique_ptr<MCCodeEmitter> Emitter,
-                                    bool RelaxAll, bool IsThumb,
-                                    bool IsAndroid) {
+                                    bool IsThumb, bool IsAndroid) {
   ARMELFStreamer *S =
       new ARMELFStreamer(Context, std::move(TAB), std::move(OW),
                          std::move(Emitter), IsThumb, IsAndroid);
@@ -1497,8 +1496,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context,
   // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
   S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
 
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
 
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 8d9959a9457dbe..20603b6cf1b0bb 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -359,10 +359,9 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     bool RelaxAll) {
+                                     std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createARMELFStreamer(
-      Ctx, std::move(MAB), std::move(OW), std::move(Emitter), false,
+      Ctx, std::move(MAB), std::move(OW), std::move(Emitter),
       (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb),
       T.isAndroid());
 }
@@ -370,10 +369,10 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
 static MCStreamer *
 createARMMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&MAB,
                        std::unique_ptr<MCObjectWriter> &&OW,
-                       std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
+                       std::unique_ptr<MCCodeEmitter> &&Emitter,
                        bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, std::move(MAB), std::move(OW),
-                             std::move(Emitter), false, DWARFMustBeAtTheEnd);
+                             std::move(Emitter), DWARFMustBeAtTheEnd);
 }
 
 static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 3066d9ba6783b2..a673d590419ecc 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -94,7 +94,6 @@ MCStreamer *createARMWinCOFFStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
                                      std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     bool RelaxAll,
                                      bool IncrementalLinkerCompatible);
 
 /// Construct an ELF Mach-O object writer.
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index cdd7f6fb715a70..0fcf6eb1a5abb5 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -66,11 +66,12 @@ void ARMWinCOFFStreamer::finishImpl() {
 }
 }
 
-MCStreamer *llvm::createARMWinCOFFStreamer(
-    MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB,
-    std::unique_ptr<MCObjectWriter> &&OW,
-    std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll,
-    bool IncrementalLinkerCompatible) {
+MCStreamer *
+llvm::createARMWinCOFFStreamer(MCContext &Context,
+                               std::unique_ptr<MCAsmBackend> &&MAB,
+                               std::unique_ptr<MCObjectWriter> &&OW,
+                               std::unique_ptr<MCCodeEmitter> &&Emitter,
+                               bool IncrementalLinkerCompatible) {
   auto *S = new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter),
                                    std::move(OW));
   S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index ba370261e284c5..119baff83dae46 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -72,10 +72,9 @@ static MCInstPrinter *createAVRMCInstPrinter(const Triple &T,
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createELFStreamer(Context, std::move(MAB), std::move(OW),
-                           std::move(Emitter), RelaxAll);
+                           std::move(Emitter));
 }
 
 static MCTargetStreamer *
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 44932383fb43e9..caf84701b999f0 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -50,13 +50,13 @@ static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
   return createBPFMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
 }
 
-static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx,
-                                       std::unique_ptr<MCAsmBackend> &&MAB,
-                                       std::unique_ptr<MCObjectWriter> &&OW,
-                                       std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                       bool RelaxAll) {
-  return createELFStreamer(Ctx, std::move(MAB), std::move(OW), std::move(Emitter),
-                           RelaxAll);
+static MCStreamer *
+createBPFMCStreamer(const Triple &T, MCContext &Ctx,
+                    std::unique_ptr<MCAsmBackend> &&MAB,
+                    std::unique_ptr<MCObjectWriter> &&OW,
+                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
+  return createELFStreamer(Ctx, std::move(MAB), std::move(OW),
+                           std::move(Emitter));
 }
 
 static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
index 64f01cd1c9fa7c..c3403ade389c40 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -88,13 +88,10 @@ createCSKYObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
                                      std::unique_ptr<MCAsmBackend> &&MAB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
-                                     std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                     bool RelaxAll) {
+                                     std::unique_ptr<MCCodeEmitter> &&Emitter) {
   CSKYELFStreamer *S = new CSKYELFStreamer(Ctx, std::move(MAB), std::move(OW),
                                            std::move(Emitter));
 
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
   return S;
 }
 
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index dc8328a6705da8..0a948402fb896a 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -385,8 +385,7 @@ createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
 static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createHexagonELFStreamer(T, Context, std::move(MAB), std::move(OW),
                                   std::move(Emitter));
 }
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index 8f83c883e822e0..4a381c033b384d 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -63,13 +63,12 @@ createLanaiMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   if (!T.isOSBinFormatELF())
     llvm_unreachable("OS not supported");
 
   return createELFStreamer(Context, std::move(MAB), std::move(OW),
-                           std::move(Emitter), RelaxAll);
+                           std::move(Emitter));
 }
 
 static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
index a6e15e09463d26..9e56333e5fd9b5 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
@@ -82,11 +82,9 @@ namespace llvm {
 MCELFStreamer *createLoongArchELFStreamer(MCContext &C,
                                           std::unique_ptr<MCAsmBackend> MAB,
                                           std::unique_ptr<MCObjectWriter> MOW,
-                                          std::unique_ptr<MCCodeEmitter> MCE,
-                                          bool RelaxAll) {
+                                          std::unique_ptr<MCCodeEmitter> MCE) {
   LoongArchELFStreamer *S = new LoongArchELFStreamer(
       C, std::move(MAB), std::move(MOW), std::move(MCE));
-  S->getAssembler().setRelaxAll(RelaxAll);
   return S;
 }
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h
index 220b54092c72a1..e220729d8923e2 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h
@@ -25,7 +25,6 @@ class LoongArchTargetELFStreamer : public LoongArchTargetStreamer {
 MCELFStreamer *createLoongArchELFStreamer(MCContext &C,
                                           std::unique_ptr<MCAsmBackend> MAB,
                                           std::unique_ptr<MCObjectWriter> MOW,
-                                          std::unique_ptr<MCCodeEmitter> MCE,
-                                          bool RelaxAll);
+                                          std::unique_ptr<MCCodeEmitter> MCE);
 } // end namespace llvm
 #endif
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index a4e6a09863e6a6..e40981f5b5cd57 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -193,10 +193,9 @@ namespace {
 MCStreamer *createLoongArchELFStreamer(const Triple &T, MCContext &Context,
                                        std::unique_ptr<MCAsmBackend> &&MAB,
                                        std::unique_ptr<MCObjectWriter> &&MOW,
-                                       std::unique_ptr<MCCodeEmitter> &&MCE,
-                                       bool RelaxAll) {
+                                       std::unique_ptr<MCCodeEmitter> &&MCE) {
   return createLoongArchELFStreamer(Context, std::move(MAB), std::move(MOW),
-                                    std::move(MCE), RelaxAll);
+                                    std::move(MCE));
 }
 } // end namespace
 
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 9843b6144343e3..e907e8d8a70022 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -112,10 +112,11 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
     I->EmitMipsOptionRecord();
 }
 
-MCELFStreamer *llvm::createMipsELFStreamer(
-    MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
-    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
-    bool RelaxAll) {
+MCELFStreamer *
+llvm::createMipsELFStreamer(MCContext &Context,
+                            std::unique_ptr<MCAsmBackend> MAB,
+                            std::unique_ptr<MCObjectWriter> OW,
+                            std::unique_ptr<MCCodeEmitter> Emitter) {
   return new MipsELFStreamer(Context, std::move(MAB), std::move(OW),
                              std::move(Emitter));
 }
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index ac70e40d4dfe96..051806d2cfe8f5 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -75,8 +75,7 @@ class MipsELFStreamer : public MCELFStreamer {
 MCELFStreamer *createMipsELFStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> MAB,
                                      std::unique_ptr<MCObjectWriter> OW,
-                                     std::unique_ptr<MCCodeEmitter> Emitter,
-                                     bool RelaxAll);
+                                     std::unique_ptr<MCCodeEmitter> Emitter);
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index a84ca8ccfb2d12..2722e34b3f6246 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -22,11 +22,10 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
 bool baseRegNeedsLoadStoreMask(unsigned Reg);
 
 // This function creates an MCELFStreamer for Mips NaCl.
-MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context,
-                                         std::unique_ptr<MCAsmBackend> TAB,
-                                         std::unique_ptr<MCObjectWriter> OW,
-                                         std::unique_ptr<MCCodeEmitter> Emitter,
-                                         bool RelaxAll);
+MCELFStreamer *
+createMipsNaClELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+                          std::unique_ptr<MCObjectWriter> OW,
+                          std::unique_ptr<MCCodeEmitter> Emitter);
 }
 
 #endif
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index d38b89f9a1f258..499cbd873e299a 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -104,15 +104,14 @@ static MCInstPrinter *createMipsMCInstPrinter(const Triple &T,
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> &&MAB,
                                     std::unique_ptr<MCObjectWriter> &&OW,
-                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
-                                    bool RelaxAll) {
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter) {
   MCStreamer *S;
   if (!T.isOSNaCl())
     S = createMipsELFStreamer(Context, std::move(MAB), std::move(OW),
-                              std::move(Emitter), RelaxAll);
+                              std::move(Emitter));
   else
     S = createMipsNaClELFStreamer(Context, std::move(MAB), std::move(OW),
-                                  std::move(Emitter), RelaxAll);
+                                  std::move(Emitter));
   return S;
 }
 
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 4ba0ae91e2f653..86194a9ebb61a2 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -259,15 +259,12 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
   return Reg != Mips::SP && Reg != Mips::T8;
 }
 
-MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context,
-                                         std::unique_ptr<MCAsmBackend> TAB,
-                                         std::unique_ptr<MCObjectWriter> OW,
-                                         std::unique_ptr<MCCodeEmitter> Emitter,
-                                         bool RelaxAll) {
+MCELFStreamer *
+createMipsNaClELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
+                          std::unique_ptr<MCObjectWriter> OW,
+                          std::unique_ptr<MCCodeEmitter> Emitter) {
   MipsNaClELFStreamer *S = new MipsNaClELFStreamer(
       Context, std::move(TAB), std::move(OW), std::move(Emitter));
-  if (RelaxAll)
-    S->getAssembler().setRelaxAll(true);
 
   // Set bundle-alignment as required by the NaCl ABI for the target.
   S->emitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index b849b7be7b7be8..241078b038735b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -203,15 +203,16 @@ static MCStreamer *
 createPPCELFStreamer(const Triple &T, MCContext &Context,
                      std::unique_ptr<MCAsmBackend> &&MAB,
                      std::unique_ptr<MCObjectWriter> &&OW,
-                     std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) {
+                     std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createPPCELFStreamer(Context, std::move(MAB), std::move(OW),
                               std::move(Emitter));
 }
 
-static MCStreamer *createPPCXCOFFStreamer(
-    const Triple &T, MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB,
-    std::unique_ptr<MCObjectWriter> &&OW,
-    std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) {
+static MCStreamer *
+createPPCXCOFFStreamer(const Triple &T, MCContext &Context,
+                       std::unique_ptr<MCAsmBackend> &&MAB,
+                       std::unique_ptr<MCObjectWriter> &&OW,
+                       std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createPPCXCOFFStreamer(Context, std::move(MAB), std::move(OW),
                                 std::move(Emitter));
 }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index cdf7c048a4bf11..ae7ce476fff222 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -197,11 +197,9 @@ namespace llvm {
 MCELFStreamer *createRISCVELFStreamer(MCContext &C,
                                       std::unique_ptr<MCAsmBackend> MAB,
                                       std::unique_ptr<MCObjectWriter> MOW,
-                                      std::unique_ptr<MCCodeEmitter> MCE,
-                                      bool RelaxAll) {
+                                      std::unique_ptr<MCCodeEmitter> MCE) {
   RISCVELFStreamer *S =
       new RISCVELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE));
-  S->getAssembler().setRelaxAll(RelaxAll);
   return S;
 }
 } // namespace llvm
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index e8f29cd8449ba0..212d731889f1ae 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -75,7 +75,6 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer {
 MCELFStreamer *createRISCVELFStreamer(MCContext &C,
                                       std::unique_ptr<MCAsmBackend> MAB,
                                       std::unique_ptr<MCObjectWriter> MOW,
-                                      std::unique_ptr<MCCodeEmitter> MCE,
-                                      bool RelaxAll);
+                                      std::unique_ptr<MCCodeEmitter> MCE);
 }
 #endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 12a69842ab4c37..691a5892ae827b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -332,10 +332,9 @@ namespace {
 MCStreamer *createRISCVELFStreamer(const Triple &T, MCContext &Context,
                                    std::unique_ptr<MCAsmBackend> &&MAB,
                                    std::unique_ptr<MCObjectWriter> &&MOW,
-                                   std::unique_ptr<MCCodeEmitter> &&MCE,
-                                   bool RelaxAll) {
+                                   std::unique_ptr<MCCodeEmitter> &&MCE) {
   return createRISCVELFStreamer(Context, std::move(MAB), std::move(MOW),
-                                std::move(MCE), RelaxAll);
+                                std::move(MCE));
 }
 } // end anonymous namespace
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 0f92e9ed6a64d3..adb17cec28c26f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -75,6 +75,13 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI,
     auto &ISAInfo = *ParseResult;
     emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString());
   }
+
+  if (STI.hasFeature(RISCV::FeatureStdExtA)) {
+    unsigned AtomicABITag = STI.hasFeature(RISCV::FeatureNoTrailingSeqCstFence)
+                                ? RISCVAttrs::RISCVAtomicAbiTag::AtomicABI::A6C
+                                : RISCVAttrs::RISCVAtomicAbiTag::AtomicABI::A6S;
+    emitAttribute(RISCVAttrs::ATOMIC_ABI, AtomicABITag);
+  }
 }
 
 // This part is for ascii assembly output
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index c3dc4ea53697c0..deb983528f323c 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1216,10 +1216,10 @@ foreach i = {1-31} in
 def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
                                           "true", "Enable save/restore.">;
 
-def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
-                                          "EnableSeqCstTrailingFence",
-                                          "true",
-                                          "Enable trailing fence for seq-cst store.">;
+def FeatureNoTrailingSeqCstFence : SubtargetFeature<"no-trailing-seq-cst-fence",
+                                          "EnableTrailingSeqCstFence",
+                                          "false",
+                                          "Disable trailing fence for seq-cst store.">;
 
 def FeatureUnalignedScalarMem
    : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6529ab7a84a133..769c465d56f984 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     return SDValue();
   uint64_t MulAmt = CNode->getZExtValue();
 
+  // WARNING: The code below is knowingly incorrect with regards to undef semantics.
+  // We're adding additional uses of X here, and in principle, we should be freezing
+  // X before doing so.  However, adding freeze here causes real regressions, and no
+  // other target properly freezes X in these cases either.
+  SDValue X = N->getOperand(0);
+
   for (uint64_t Divisor : {3, 5, 9}) {
     if (MulAmt % Divisor != 0)
       continue;
@@ -13428,7 +13434,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
     if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
       SDLoc DL(N);
-      SDValue X = DAG.getFreeze(N->getOperand(0));
       SDValue Mul359 =
           DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
                       DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
@@ -13446,7 +13451,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     if (ScaleShift >= 1 && ScaleShift < 4) {
       unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
       SDLoc DL(N);
-      SDValue X = DAG.getFreeze(N->getOperand(0));
       SDValue Shift1 =
           DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
       return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
@@ -13466,7 +13470,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     unsigned TZ = llvm::countr_zero(C);
     if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
       SDLoc DL(N);
-      SDValue X = DAG.getFreeze(N->getOperand(0));
       SDValue Mul359 =
           DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
                       DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
@@ -13481,7 +13484,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     if (ScaleShift >= 1 && ScaleShift < 4) {
       unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
       SDLoc DL(N);
-      SDValue X = DAG.getFreeze(N->getOperand(0));
       SDValue Shift1 =
           DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
       return DAG.getNode(ISD::ADD, DL, VT, Shift1,
@@ -13495,11 +13497,11 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
     if (isPowerOf2_64(MulAmt + Offset)) {
       SDLoc DL(N);
       SDValue Shift1 =
-          DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+          DAG.getNode(ISD::SHL, DL, VT, X,
                       DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
-      SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, N->getOperand(0),
+      SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
                                    DAG.getConstant(Log2_64(Offset - 1), DL, VT),
-                                   N->getOperand(0));
+                                   X);
       return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
     }
   }
@@ -20190,7 +20192,7 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
 
   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
     return Builder.CreateFence(AtomicOrdering::Acquire);
-  if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
+  if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
       Ord == AtomicOrdering::SequentiallyConsistent)
     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
   return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 3d598dd6f708ef..3feb7ec347543c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -262,6 +262,17 @@ struct DemandedFields {
     VLZeroness = true;
   }
 
+  // Make this the result of demanding both the fields in this and B.
+  void doUnion(const DemandedFields &B) {
+    VLAny |= B.VLAny;
+    VLZeroness |= B.VLZeroness;
+    SEW = std::max(SEW, B.SEW);
+    LMUL |= B.LMUL;
+    SEWLMULRatio |= B.SEWLMULRatio;
+    TailPolicy |= B.TailPolicy;
+    MaskPolicy |= B.MaskPolicy;
+  }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Support for debugging, callable in GDB: V->dump()
   LLVM_DUMP_METHOD void dump() const {
@@ -1326,11 +1337,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
        PHIOp += 2) {
     Register InReg = PHI->getOperand(PHIOp).getReg();
     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
-    const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
-    // If the exit from the predecessor has the VTYPE we are looking for
-    // we might be able to avoid a VSETVLI.
-    if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
-      return true;
+    const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
 
     // We need the PHI input to the be the output of a VSET(I)VLI.
     MachineInstr *DefMI = MRI->getVRegDef(InReg);
@@ -1340,8 +1347,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
     // We found a VSET(I)VLI make sure it matches the output of the
     // predecessor block.
     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
-    if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
-        !DefInfo.hasSameVTYPE(PBBInfo.Exit))
+    if (DefInfo != PBBExit)
+      return true;
+
+    // Require has the same VL as PBBExit, so if the exit from the
+    // predecessor has the VTYPE we are looking for we might be able
+    // to avoid a VSETVLI.
+    if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
       return true;
   }
 
@@ -1547,16 +1559,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
                 AvailableInfo, OldExit);
 }
 
-static void doUnion(DemandedFields &A, DemandedFields B) {
-  A.VLAny |= B.VLAny;
-  A.VLZeroness |= B.VLZeroness;
-  A.SEW = std::max(A.SEW, B.SEW);
-  A.LMUL |= B.LMUL;
-  A.SEWLMULRatio |= B.SEWLMULRatio;
-  A.TailPolicy |= B.TailPolicy;
-  A.MaskPolicy |= B.MaskPolicy;
-}
-
 // Return true if we can mutate PrevMI to match MI without changing any the
 // fields which would be observed.
 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
@@ -1606,7 +1608,7 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
   for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
 
     if (!isVectorConfigInstr(MI)) {
-      doUnion(Used, getDemanded(MI, MRI, ST));
+      Used.doUnion(getDemanded(MI, MRI, ST));
       if (MI.isCall() || MI.isInlineAsm() ||
           MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
           MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5c1f154efa9911..3efd09aeae879d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1633,8 +1633,230 @@ static bool isFMUL(unsigned Opc) {
   }
 }
 
+bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
+                                                       bool Invert) const {
+#define OPCODE_LMUL_CASE(OPC)                                                  \
+  case RISCV::OPC##_M1:                                                        \
+  case RISCV::OPC##_M2:                                                        \
+  case RISCV::OPC##_M4:                                                        \
+  case RISCV::OPC##_M8:                                                        \
+  case RISCV::OPC##_MF2:                                                       \
+  case RISCV::OPC##_MF4:                                                       \
+  case RISCV::OPC##_MF8
+
+#define OPCODE_LMUL_MASK_CASE(OPC)                                             \
+  case RISCV::OPC##_M1_MASK:                                                   \
+  case RISCV::OPC##_M2_MASK:                                                   \
+  case RISCV::OPC##_M4_MASK:                                                   \
+  case RISCV::OPC##_M8_MASK:                                                   \
+  case RISCV::OPC##_MF2_MASK:                                                  \
+  case RISCV::OPC##_MF4_MASK:                                                  \
+  case RISCV::OPC##_MF8_MASK
+
+  unsigned Opcode = Inst.getOpcode();
+  if (Invert) {
+    if (auto InvOpcode = getInverseOpcode(Opcode))
+      Opcode = *InvOpcode;
+    else
+      return false;
+  }
+
+  // clang-format off
+  switch (Opcode) {
+  default:
+    return false;
+  OPCODE_LMUL_CASE(PseudoVADD_VV):
+  OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
+  OPCODE_LMUL_CASE(PseudoVMUL_VV):
+  OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
+    return true;
+  }
+  // clang-format on
+
+#undef OPCODE_LMUL_MASK_CASE
+#undef OPCODE_LMUL_CASE
+}
+
+bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
+                                             const MachineInstr &Prev) const {
+  if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))
+    return false;
+
+  assert(Root.getMF() == Prev.getMF());
+  const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
+  const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+
+  // Make sure vtype operands are also the same.
+  const MCInstrDesc &Desc = get(Root.getOpcode());
+  const uint64_t TSFlags = Desc.TSFlags;
+
+  auto checkImmOperand = [&](unsigned OpIdx) {
+    return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();
+  };
+
+  auto checkRegOperand = [&](unsigned OpIdx) {
+    return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();
+  };
+
+  // PassThru
+  // TODO: Potentially we can loosen the condition to consider Root to be
+  // associable with Prev if Root has NoReg as passthru. In which case we
+  // also need to loosen the condition on vector policies between these.
+  if (!checkRegOperand(1))
+    return false;
+
+  // SEW
+  if (RISCVII::hasSEWOp(TSFlags) &&
+      !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
+    return false;
+
+  // Mask
+  if (RISCVII::usesMaskPolicy(TSFlags)) {
+    const MachineBasicBlock *MBB = Root.getParent();
+    const MachineBasicBlock::const_reverse_iterator It1(&Root);
+    const MachineBasicBlock::const_reverse_iterator It2(&Prev);
+    Register MI1VReg;
+
+    bool SeenMI2 = false;
+    for (auto End = MBB->rend(), It = It1; It != End; ++It) {
+      if (It == It2) {
+        SeenMI2 = true;
+        if (!MI1VReg.isValid())
+          // There is no V0 def between Root and Prev; they're sharing the
+          // same V0.
+          break;
+      }
+
+      if (It->modifiesRegister(RISCV::V0, TRI)) {
+        Register SrcReg = It->getOperand(1).getReg();
+        // If it's not VReg it'll be more difficult to track its defs, so
+        // bailing out here just to be safe.
+        if (!SrcReg.isVirtual())
+          return false;
+
+        if (!MI1VReg.isValid()) {
+          // This is the V0 def for Root.
+          MI1VReg = SrcReg;
+          continue;
+        }
+
+        // Some random mask updates.
+        if (!SeenMI2)
+          continue;
+
+        // This is the V0 def for Prev; check if it's the same as that of
+        // Root.
+        if (MI1VReg != SrcReg)
+          return false;
+        else
+          break;
+      }
+    }
+
+    // If we haven't encountered Prev, it's likely that this function was
+    // called in a wrong way (e.g. Root is before Prev).
+    assert(SeenMI2 && "Prev is expected to appear before Root");
+  }
+
+  // Tail / Mask policies
+  if (RISCVII::hasVecPolicyOp(TSFlags) &&
+      !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
+    return false;
+
+  // VL
+  if (RISCVII::hasVLOp(TSFlags)) {
+    unsigned OpIdx = RISCVII::getVLOpNum(Desc);
+    const MachineOperand &Op1 = Root.getOperand(OpIdx);
+    const MachineOperand &Op2 = Prev.getOperand(OpIdx);
+    if (Op1.getType() != Op2.getType())
+      return false;
+    switch (Op1.getType()) {
+    case MachineOperand::MO_Register:
+      if (Op1.getReg() != Op2.getReg())
+        return false;
+      break;
+    case MachineOperand::MO_Immediate:
+      if (Op1.getImm() != Op2.getImm())
+        return false;
+      break;
+    default:
+      llvm_unreachable("Unrecognized VL operand type");
+    }
+  }
+
+  // Rounding modes
+  if (RISCVII::hasRoundModeOp(TSFlags) &&
+      !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1))
+    return false;
+
+  return true;
+}
+
+// Most of our RVV pseudos have passthru operand, so the real operands
+// start from index = 2.
+bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
+                                                  bool &Commuted) const {
+  const MachineBasicBlock *MBB = Inst.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
+         "Expect the present of passthrough operand.");
+  MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+  MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());
+
+  // If only one operand has the same or inverse opcode and it's the second
+  // source operand, the operands must be commuted.
+  Commuted = !areRVVInstsReassociable(Inst, *MI1) &&
+             areRVVInstsReassociable(Inst, *MI2);
+  if (Commuted)
+    std::swap(MI1, MI2);
+
+  return areRVVInstsReassociable(Inst, *MI1) &&
+         (isVectorAssociativeAndCommutative(*MI1) ||
+          isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
+         hasReassociableOperands(*MI1, MBB) &&
+         MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+bool RISCVInstrInfo::hasReassociableOperands(
+    const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+  if (!isVectorAssociativeAndCommutative(Inst) &&
+      !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
+    return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
+
+  const MachineOperand &Op1 = Inst.getOperand(2);
+  const MachineOperand &Op2 = Inst.getOperand(3);
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  // We need virtual register definitions for the operands that we will
+  // reassociate.
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (Op1.isReg() && Op1.getReg().isVirtual())
+    MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+  if (Op2.isReg() && Op2.getReg().isVirtual())
+    MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+  // And at least one operand must be defined in MBB.
+  return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
+}
+
+void RISCVInstrInfo::getReassociateOperandIndices(
+    const MachineInstr &Root, unsigned Pattern,
+    std::array<unsigned, 5> &OperandIndices) const {
+  TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
+  if (RISCV::getRVVMCOpcode(Root.getOpcode())) {
+    // Skip the passthrough operand, so increment all indices by one.
+    for (unsigned I = 0; I < 5; ++I)
+      ++OperandIndices[I];
+  }
+}
+
 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
                                             bool &Commuted) const {
+  if (isVectorAssociativeAndCommutative(Inst) ||
+      isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
+    return hasReassociableVectorSibling(Inst, Commuted);
+
   if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
     return false;
 
@@ -1654,6 +1876,9 @@ bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
 
 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
                                                  bool Invert) const {
+  if (isVectorAssociativeAndCommutative(Inst, Invert))
+    return true;
+
   unsigned Opc = Inst.getOpcode();
   if (Invert) {
     auto InverseOpcode = getInverseOpcode(Opc);
@@ -1706,6 +1931,38 @@ bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
 
 std::optional<unsigned>
 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
+#define RVV_OPC_LMUL_CASE(OPC, INV)                                            \
+  case RISCV::OPC##_M1:                                                        \
+    return RISCV::INV##_M1;                                                    \
+  case RISCV::OPC##_M2:                                                        \
+    return RISCV::INV##_M2;                                                    \
+  case RISCV::OPC##_M4:                                                        \
+    return RISCV::INV##_M4;                                                    \
+  case RISCV::OPC##_M8:                                                        \
+    return RISCV::INV##_M8;                                                    \
+  case RISCV::OPC##_MF2:                                                       \
+    return RISCV::INV##_MF2;                                                   \
+  case RISCV::OPC##_MF4:                                                       \
+    return RISCV::INV##_MF4;                                                   \
+  case RISCV::OPC##_MF8:                                                       \
+    return RISCV::INV##_MF8
+
+#define RVV_OPC_LMUL_MASK_CASE(OPC, INV)                                       \
+  case RISCV::OPC##_M1_MASK:                                                   \
+    return RISCV::INV##_M1_MASK;                                               \
+  case RISCV::OPC##_M2_MASK:                                                   \
+    return RISCV::INV##_M2_MASK;                                               \
+  case RISCV::OPC##_M4_MASK:                                                   \
+    return RISCV::INV##_M4_MASK;                                               \
+  case RISCV::OPC##_M8_MASK:                                                   \
+    return RISCV::INV##_M8_MASK;                                               \
+  case RISCV::OPC##_MF2_MASK:                                                  \
+    return RISCV::INV##_MF2_MASK;                                              \
+  case RISCV::OPC##_MF4_MASK:                                                  \
+    return RISCV::INV##_MF4_MASK;                                              \
+  case RISCV::OPC##_MF8_MASK:                                                  \
+    return RISCV::INV##_MF8_MASK
+
   switch (Opcode) {
   default:
     return std::nullopt;
@@ -1729,7 +1986,16 @@ RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
     return RISCV::SUBW;
   case RISCV::SUBW:
     return RISCV::ADDW;
+    // clang-format off
+  RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
+  RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
+  RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
+  RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
+    // clang-format on
   }
+
+#undef RVV_OPC_LMUL_MASK_CASE
+#undef RVV_OPC_LMUL_CASE
 }
 
 static bool canCombineFPFusedMultiply(const MachineInstr &Root,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 3b03d5efde6ef5..170f813eb10d7d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -266,6 +266,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
       SmallVectorImpl<MachineInstr *> &DelInstrs,
       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
 
+  bool hasReassociableOperands(const MachineInstr &Inst,
+                               const MachineBasicBlock *MBB) const override;
+
   bool hasReassociableSibling(const MachineInstr &Inst,
                               bool &Commuted) const override;
 
@@ -274,6 +277,10 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   std::optional<unsigned> getInverseOpcode(unsigned Opcode) const override;
 
+  void getReassociateOperandIndices(
+      const MachineInstr &Root, unsigned Pattern,
+      std::array<unsigned, 5> &OperandIndices) const override;
+
   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;
 
@@ -297,6 +304,13 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
 private:
   unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+  bool isVectorAssociativeAndCommutative(const MachineInstr &MI,
+                                         bool Invert = false) const;
+  bool areRVVInstsReassociable(const MachineInstr &MI1,
+                               const MachineInstr &MI2) const;
+  bool hasReassociableVectorSibling(const MachineInstr &Inst,
+                                    bool &Commuted) const;
 };
 
 namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index aac7dc444a2de3..aaf9c019aedfe8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfoZvk.td - RISC-V 'Zvk' instructions -------*- tablegen -*-===//
+//===-- RISCVInstrInfoZvk.td - RISC-V 'Zvk' instructions ---*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
index 78dfbf4ec9327a..74ebaa9d0c0047 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp
@@ -53,9 +53,9 @@ static MCStreamer *
 createSPIRVMCStreamer(const Triple &T, MCContext &Ctx,
                       std::unique_ptr<MCAsmBackend> &&MAB,
                       std::unique_ptr<MCObjectWriter> &&OW,
-                      std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) {
+                      std::unique_ptr<MCCodeEmitter> &&Emitter) {
   return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW),
-                             std::move(Emitter), RelaxAll);
+                             std::move(Emitter));
 }
 
 static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 115f34fa7751da..2da4431cf077eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -9631,7 +9631,7 @@ SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
   case 8:
   case 16:
     Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   case 32:
   case 64:
     Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
index b7b5b2a97c59e2..8ea02bd2ad1ff0 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -18,24 +18,16 @@
 using namespace llvm;
 
 std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
-  // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
-  // "invalid" value.
-  if (Type == "i32")
-    return wasm::ValType::I32;
-  if (Type == "i64")
-    return wasm::ValType::I64;
-  if (Type == "f32")
-    return wasm::ValType::F32;
-  if (Type == "f64")
-    return wasm::ValType::F64;
-  if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || Type == "i32x4" ||
-      Type == "i64x2" || Type == "f32x4" || Type == "f64x2")
-    return wasm::ValType::V128;
-  if (Type == "funcref")
-    return wasm::ValType::FUNCREF;
-  if (Type == "externref")
-    return wasm::ValType::EXTERNREF;
-  return std::nullopt;
+  return llvm::StringSwitch<std::optional<wasm::ValType>>{Type}
+      .Case("i32", wasm::ValType::I32)
+      .Case("i64", wasm::ValType::I64)
+      .Case("f32", wasm::ValType::F32)
+      .Case("f64", wasm::ValType::F64)
+      .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2",
+             wasm::ValType::V128)
+      .Case("funcref", wasm::ValType::FUNCREF)
+      .Case("externref", wasm::ValType::EXTERNREF)
+      .Default(std::nullopt);
 }
 
 WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 437a7bd6ff6c4c..18ecca34943f6b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -121,7 +121,6 @@ MCStreamer *createX86WinCOFFStreamer(MCContext &C,
                                      std::unique_ptr<MCAsmBackend> &&AB,
                                      std::unique_ptr<MCObjectWriter> &&OW,
                                      std::unique_ptr<MCCodeEmitter> &&CE,
-                                     bool RelaxAll,
                                      bool IncrementalLinkerCompatible);
 
 /// Construct an X86 Mach-O object writer.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 36945d1f67468f..dac8bc1fb1be35 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -70,11 +70,9 @@ MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C,
                                            std::unique_ptr<MCAsmBackend> &&AB,
                                            std::unique_ptr<MCObjectWriter> &&OW,
                                            std::unique_ptr<MCCodeEmitter> &&CE,
-                                           bool RelaxAll,
                                            bool IncrementalLinkerCompatible) {
   X86WinCOFFStreamer *S =
       new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW));
-  S->getAssembler().setRelaxAll(RelaxAll);
   S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
   return S;
 }
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 39cb3f2c2fe178..ea0b56b9a1339b 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -47,204 +47,8 @@ static const char *RISCVGImplications[] = {
   "i", "m", "a", "f", "d", "zicsr", "zifencei"
 };
 
-// NOTE: This table should be sorted alphabetically by extension name.
-static const RISCVSupportedExtension SupportedExtensions[] = {
-    {"a", {2, 1}},
-    {"c", {2, 0}},
-    {"d", {2, 2}},
-    {"e", {2, 0}},
-    {"f", {2, 2}},
-    {"h", {1, 0}},
-    {"i", {2, 1}},
-    {"m", {2, 0}},
-
-    {"shcounterenw", {1, 0}},
-    {"shgatpa", {1, 0}},
-    {"shtvala", {1, 0}},
-    {"shvsatpa", {1, 0}},
-    {"shvstvala", {1, 0}},
-    {"shvstvecd", {1, 0}},
-    {"smaia", {1, 0}},
-    {"smepmp", {1, 0}},
-    {"ssaia", {1, 0}},
-    {"ssccptr", {1, 0}},
-    {"sscofpmf", {1, 0}},
-    {"sscounterenw", {1, 0}},
-    {"ssstateen", {1, 0}},
-    {"ssstrict", {1, 0}},
-    {"sstc", {1, 0}},
-    {"sstvala", {1, 0}},
-    {"sstvecd", {1, 0}},
-    {"ssu64xl", {1, 0}},
-    {"svade", {1, 0}},
-    {"svadu", {1, 0}},
-    {"svbare", {1, 0}},
-    {"svinval", {1, 0}},
-    {"svnapot", {1, 0}},
-    {"svpbmt", {1, 0}},
-
-    {"v", {1, 0}},
-
-    // vendor-defined ('X') extensions
-    {"xcvalu", {1, 0}},
-    {"xcvbi", {1, 0}},
-    {"xcvbitmanip", {1, 0}},
-    {"xcvelw", {1, 0}},
-    {"xcvmac", {1, 0}},
-    {"xcvmem", {1, 0}},
-    {"xcvsimd", {1, 0}},
-    {"xsfcease", {1, 0}},
-    {"xsfvcp", {1, 0}},
-    {"xsfvfnrclipxfqf", {1, 0}},
-    {"xsfvfwmaccqqq", {1, 0}},
-    {"xsfvqmaccdod", {1, 0}},
-    {"xsfvqmaccqoq", {1, 0}},
-    {"xsifivecdiscarddlone", {1, 0}},
-    {"xsifivecflushdlone", {1, 0}},
-    {"xtheadba", {1, 0}},
-    {"xtheadbb", {1, 0}},
-    {"xtheadbs", {1, 0}},
-    {"xtheadcmo", {1, 0}},
-    {"xtheadcondmov", {1, 0}},
-    {"xtheadfmemidx", {1, 0}},
-    {"xtheadmac", {1, 0}},
-    {"xtheadmemidx", {1, 0}},
-    {"xtheadmempair", {1, 0}},
-    {"xtheadsync", {1, 0}},
-    {"xtheadvdot", {1, 0}},
-    {"xventanacondops", {1, 0}},
-
-    {"za128rs", {1, 0}},
-    {"za64rs", {1, 0}},
-    {"zacas", {1, 0}},
-    {"zama16b", {1, 0}},
-    {"zawrs", {1, 0}},
-
-    {"zba", {1, 0}},
-    {"zbb", {1, 0}},
-    {"zbc", {1, 0}},
-    {"zbkb", {1, 0}},
-    {"zbkc", {1, 0}},
-    {"zbkx", {1, 0}},
-    {"zbs", {1, 0}},
-
-    {"zca", {1, 0}},
-    {"zcb", {1, 0}},
-    {"zcd", {1, 0}},
-    {"zce", {1, 0}},
-    {"zcf", {1, 0}},
-    {"zcmop", {1, 0}},
-    {"zcmp", {1, 0}},
-    {"zcmt", {1, 0}},
-
-    {"zdinx", {1, 0}},
-
-    {"zfa", {1, 0}},
-    {"zfh", {1, 0}},
-    {"zfhmin", {1, 0}},
-    {"zfinx", {1, 0}},
-
-    {"zhinx", {1, 0}},
-    {"zhinxmin", {1, 0}},
-
-    {"zic64b", {1, 0}},
-    {"zicbom", {1, 0}},
-    {"zicbop", {1, 0}},
-    {"zicboz", {1, 0}},
-    {"ziccamoa", {1, 0}},
-    {"ziccif", {1, 0}},
-    {"zicclsm", {1, 0}},
-    {"ziccrse", {1, 0}},
-    {"zicntr", {2, 0}},
-    {"zicond", {1, 0}},
-    {"zicsr", {2, 0}},
-    {"zifencei", {2, 0}},
-    {"zihintntl", {1, 0}},
-    {"zihintpause", {2, 0}},
-    {"zihpm", {2, 0}},
-    {"zimop", {1, 0}},
-
-    {"zk", {1, 0}},
-    {"zkn", {1, 0}},
-    {"zknd", {1, 0}},
-    {"zkne", {1, 0}},
-    {"zknh", {1, 0}},
-    {"zkr", {1, 0}},
-    {"zks", {1, 0}},
-    {"zksed", {1, 0}},
-    {"zksh", {1, 0}},
-    {"zkt", {1, 0}},
-
-    {"zmmul", {1, 0}},
-
-    {"zvbb", {1, 0}},
-    {"zvbc", {1, 0}},
-
-    {"zve32f", {1, 0}},
-    {"zve32x", {1, 0}},
-    {"zve64d", {1, 0}},
-    {"zve64f", {1, 0}},
-    {"zve64x", {1, 0}},
-
-    {"zvfh", {1, 0}},
-    {"zvfhmin", {1, 0}},
-
-    // vector crypto
-    {"zvkb", {1, 0}},
-    {"zvkg", {1, 0}},
-    {"zvkn", {1, 0}},
-    {"zvknc", {1, 0}},
-    {"zvkned", {1, 0}},
-    {"zvkng", {1, 0}},
-    {"zvknha", {1, 0}},
-    {"zvknhb", {1, 0}},
-    {"zvks", {1, 0}},
-    {"zvksc", {1, 0}},
-    {"zvksed", {1, 0}},
-    {"zvksg", {1, 0}},
-    {"zvksh", {1, 0}},
-    {"zvkt", {1, 0}},
-
-    {"zvl1024b", {1, 0}},
-    {"zvl128b", {1, 0}},
-    {"zvl16384b", {1, 0}},
-    {"zvl2048b", {1, 0}},
-    {"zvl256b", {1, 0}},
-    {"zvl32768b", {1, 0}},
-    {"zvl32b", {1, 0}},
-    {"zvl4096b", {1, 0}},
-    {"zvl512b", {1, 0}},
-    {"zvl64b", {1, 0}},
-    {"zvl65536b", {1, 0}},
-    {"zvl8192b", {1, 0}},
-};
-
-// NOTE: This table should be sorted alphabetically by extension name.
-// clang-format off
-static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
-    {"smmpm", {0, 8}},
-    {"smnpm", {0, 8}},
-    {"ssnpm", {0, 8}},
-    {"sspm", {0, 8}},
-    {"ssqosid", {1, 0}},
-    {"supm", {0, 8}},
-
-    {"zaamo", {0, 2}},
-    {"zabha", {1, 0}},
-    {"zalasr", {0, 1}},
-    {"zalrsc", {0, 2}},
-
-    {"zfbfmin", {1, 0}},
-
-    {"zicfilp", {0, 4}},
-    {"zicfiss", {0, 4}},
-
-    {"ztso", {0, 1}},
-
-    {"zvfbfmin", {1, 0}},
-    {"zvfbfwma", {1, 0}},
-};
-// clang-format on
+#define GET_SUPPORTED_EXTENSIONS
+#include "llvm/TargetParser/RISCVTargetParserDef.inc"
 
 static constexpr RISCVProfile SupportedProfiles[] = {
     {"rvi20u32", "rv32i"},
@@ -1041,66 +845,6 @@ Error RISCVISAInfo::checkDependency() {
   return Error::success();
 }
 
-static const char *ImpliedExtsD[] = {"f"};
-static const char *ImpliedExtsF[] = {"zicsr"};
-static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
-static const char *ImpliedExtsXSfvcp[] = {"zve32x"};
-static const char *ImpliedExtsXSfvfnrclipxfqf[] = {"zve32f"};
-static const char *ImpliedExtsXSfvfwmaccqqq[] = {"zvfbfmin"};
-static const char *ImpliedExtsXSfvqmaccdod[] = {"zve32x"};
-static const char *ImpliedExtsXSfvqmaccqoq[] = {"zve32x"};
-static const char *ImpliedExtsXTHeadVdot[] = {"v"};
-static const char *ImpliedExtsZcb[] = {"zca"};
-static const char *ImpliedExtsZcd[] = {"d", "zca"};
-static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
-static const char *ImpliedExtsZcf[] = {"f", "zca"};
-static const char *ImpliedExtsZcmop[] = {"zca"};
-static const char *ImpliedExtsZcmp[] = {"zca"};
-static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"};
-static const char *ImpliedExtsZdinx[] = {"zfinx"};
-static const char *ImpliedExtsZfa[] = {"f"};
-static const char *ImpliedExtsZfbfmin[] = {"f"};
-static const char *ImpliedExtsZfh[] = {"zfhmin"};
-static const char *ImpliedExtsZfhmin[] = {"f"};
-static const char *ImpliedExtsZfinx[] = {"zicsr"};
-static const char *ImpliedExtsZhinx[] = {"zhinxmin"};
-static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
-static const char *ImpliedExtsZicfiss[] = {"zicsr", "zimop"};
-static const char *ImpliedExtsZicntr[] = {"zicsr"};
-static const char *ImpliedExtsZihpm[] = {"zicsr"};
-static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
-static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
-                                       "zkne", "zknd", "zknh"};
-static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
-static const char *ImpliedExtsZvbb[] = {"zvkb"};
-static const char *ImpliedExtsZve32f[] = {"zve32x", "f"};
-static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"};
-static const char *ImpliedExtsZve64d[] = {"zve64f", "d"};
-static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
-static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
-static const char *ImpliedExtsZvfbfmin[] = {"zve32f"};
-static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin", "zfbfmin"};
-static const char *ImpliedExtsZvfh[] = {"zvfhmin", "zfhmin"};
-static const char *ImpliedExtsZvfhmin[] = {"zve32f"};
-static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"};
-static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
-static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"};
-static const char *ImpliedExtsZvknhb[] = {"zve64x"};
-static const char *ImpliedExtsZvks[] = {"zvkb", "zvksed", "zvksh", "zvkt"};
-static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"};
-static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"};
-static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
-static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
-static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"};
-static const char *ImpliedExtsZvl2048b[] = {"zvl1024b"};
-static const char *ImpliedExtsZvl256b[] = {"zvl128b"};
-static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
-static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"};
-static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
-static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
-static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
-static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"};
-
 struct ImpliedExtsEntry {
   StringLiteral Name;
   ArrayRef<const char *> Exts;
@@ -1112,67 +856,8 @@ struct ImpliedExtsEntry {
   bool operator<(StringRef Other) const { return Name < Other; }
 };
 
-// Note: The table needs to be sorted by name.
-static constexpr ImpliedExtsEntry ImpliedExts[] = {
-    {{"d"}, {ImpliedExtsD}},
-    {{"f"}, {ImpliedExtsF}},
-    {{"v"}, {ImpliedExtsV}},
-    {{"xsfvcp"}, {ImpliedExtsXSfvcp}},
-    {{"xsfvfnrclipxfqf"}, {ImpliedExtsXSfvfnrclipxfqf}},
-    {{"xsfvfwmaccqqq"}, {ImpliedExtsXSfvfwmaccqqq}},
-    {{"xsfvqmaccdod"}, {ImpliedExtsXSfvqmaccdod}},
-    {{"xsfvqmaccqoq"}, {ImpliedExtsXSfvqmaccqoq}},
-    {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
-    {{"zcb"}, {ImpliedExtsZcb}},
-    {{"zcd"}, {ImpliedExtsZcd}},
-    {{"zce"}, {ImpliedExtsZce}},
-    {{"zcf"}, {ImpliedExtsZcf}},
-    {{"zcmop"}, {ImpliedExtsZcmop}},
-    {{"zcmp"}, {ImpliedExtsZcmp}},
-    {{"zcmt"}, {ImpliedExtsZcmt}},
-    {{"zdinx"}, {ImpliedExtsZdinx}},
-    {{"zfa"}, {ImpliedExtsZfa}},
-    {{"zfbfmin"}, {ImpliedExtsZfbfmin}},
-    {{"zfh"}, {ImpliedExtsZfh}},
-    {{"zfhmin"}, {ImpliedExtsZfhmin}},
-    {{"zfinx"}, {ImpliedExtsZfinx}},
-    {{"zhinx"}, {ImpliedExtsZhinx}},
-    {{"zhinxmin"}, {ImpliedExtsZhinxmin}},
-    {{"zicfiss"}, {ImpliedExtsZicfiss}},
-    {{"zicntr"}, {ImpliedExtsZicntr}},
-    {{"zihpm"}, {ImpliedExtsZihpm}},
-    {{"zk"}, {ImpliedExtsZk}},
-    {{"zkn"}, {ImpliedExtsZkn}},
-    {{"zks"}, {ImpliedExtsZks}},
-    {{"zvbb"}, {ImpliedExtsZvbb}},
-    {{"zve32f"}, {ImpliedExtsZve32f}},
-    {{"zve32x"}, {ImpliedExtsZve32x}},
-    {{"zve64d"}, {ImpliedExtsZve64d}},
-    {{"zve64f"}, {ImpliedExtsZve64f}},
-    {{"zve64x"}, {ImpliedExtsZve64x}},
-    {{"zvfbfmin"}, {ImpliedExtsZvfbfmin}},
-    {{"zvfbfwma"}, {ImpliedExtsZvfbfwma}},
-    {{"zvfh"}, {ImpliedExtsZvfh}},
-    {{"zvfhmin"}, {ImpliedExtsZvfhmin}},
-    {{"zvkn"}, {ImpliedExtsZvkn}},
-    {{"zvknc"}, {ImpliedExtsZvknc}},
-    {{"zvkng"}, {ImpliedExtsZvkng}},
-    {{"zvknhb"}, {ImpliedExtsZvknhb}},
-    {{"zvks"}, {ImpliedExtsZvks}},
-    {{"zvksc"}, {ImpliedExtsZvksc}},
-    {{"zvksg"}, {ImpliedExtsZvksg}},
-    {{"zvl1024b"}, {ImpliedExtsZvl1024b}},
-    {{"zvl128b"}, {ImpliedExtsZvl128b}},
-    {{"zvl16384b"}, {ImpliedExtsZvl16384b}},
-    {{"zvl2048b"}, {ImpliedExtsZvl2048b}},
-    {{"zvl256b"}, {ImpliedExtsZvl256b}},
-    {{"zvl32768b"}, {ImpliedExtsZvl32768b}},
-    {{"zvl4096b"}, {ImpliedExtsZvl4096b}},
-    {{"zvl512b"}, {ImpliedExtsZvl512b}},
-    {{"zvl64b"}, {ImpliedExtsZvl64b}},
-    {{"zvl65536b"}, {ImpliedExtsZvl65536b}},
-    {{"zvl8192b"}, {ImpliedExtsZvl8192b}},
-};
+#define GET_IMPLIED_EXTENSIONS
+#include "llvm/TargetParser/RISCVTargetParserDef.inc"
 
 void RISCVISAInfo::updateImplication() {
   bool HasE = Exts.count("e") != 0;
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index f8920541e6fd64..e591a8e73b1c6f 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -281,32 +281,21 @@ static bool runIPSCCP(
     Function *F = I.first;
     const ValueLatticeElement &ReturnValue = I.second;
 
-    // If there is a known constant range for the return value, add !range
-    // metadata to the function's call sites.
+    // If there is a known constant range for the return value, add range
+    // attribute to the return value.
     if (ReturnValue.isConstantRange() &&
         !ReturnValue.getConstantRange().isSingleElement()) {
       // Do not add range metadata if the return value may include undef.
       if (ReturnValue.isConstantRangeIncludingUndef())
         continue;
 
+      // Do not touch existing attribute for now.
+      // TODO: We should be able to take the intersection of the existing
+      // attribute and the inferred range.
+      if (F->hasRetAttribute(Attribute::Range))
+        continue;
       auto &CR = ReturnValue.getConstantRange();
-      for (User *User : F->users()) {
-        auto *CB = dyn_cast<CallBase>(User);
-        if (!CB || CB->getCalledFunction() != F)
-          continue;
-
-        // Do not touch existing metadata for now.
-        // TODO: We should be able to take the intersection of the existing
-        // metadata and the inferred range.
-        if (CB->getMetadata(LLVMContext::MD_range))
-          continue;
-
-        LLVMContext &Context = CB->getParent()->getContext();
-        Metadata *RangeMD[] = {
-            ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())),
-            ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))};
-        CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD));
-      }
+      F->addRangeRetAttr(CR);
       continue;
     }
     if (F->getReturnType()->isVoidTy())
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 88b7e496897e1f..51ac77348ed9e3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2001,43 +2001,30 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
   if (!GEP1)
     return nullptr;
 
-  if (GEP2) {
-    // (gep X, ...) - (gep X, ...)
-    //
-    // Avoid duplicating the arithmetic if there are more than one non-constant
-    // indices between the two GEPs and either GEP has a non-constant index and
-    // multiple users. If zero non-constant index, the result is a constant and
-    // there is no duplication. If one non-constant index, the result is an add
-    // or sub with a constant, which is no larger than the original code, and
-    // there's no duplicated arithmetic, even if either GEP has multiple
-    // users. If more than one non-constant indices combined, as long as the GEP
-    // with at least one non-constant index doesn't have multiple users, there
-    // is no duplication.
-    unsigned NumNonConstantIndices1 = GEP1->countNonConstantIndices();
-    unsigned NumNonConstantIndices2 = GEP2->countNonConstantIndices();
-    if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 &&
-        ((NumNonConstantIndices1 > 0 && !GEP1->hasOneUse()) ||
-         (NumNonConstantIndices2 > 0 && !GEP2->hasOneUse()))) {
-      return nullptr;
-    }
-  }
+  // To avoid duplicating the offset arithmetic, rewrite the GEP to use the
+  // computed offset. This may erase the original GEP, so be sure to cache the
+  // inbounds flag before emitting the offset.
+  // TODO: We should probably do this even if there is only one GEP.
+  bool RewriteGEPs = GEP2 != nullptr;
 
   // Emit the offset of the GEP and an intptr_t.
-  Value *Result = EmitGEPOffset(GEP1);
+  bool GEP1IsInBounds = GEP1->isInBounds();
+  Value *Result = EmitGEPOffset(GEP1, RewriteGEPs);
 
   // If this is a single inbounds GEP and the original sub was nuw,
   // then the final multiplication is also nuw.
   if (auto *I = dyn_cast<Instruction>(Result))
-    if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() &&
+    if (IsNUW && !GEP2 && !Swapped && GEP1IsInBounds &&
         I->getOpcode() == Instruction::Mul)
       I->setHasNoUnsignedWrap();
 
   // If we have a 2nd GEP of the same base pointer, subtract the offsets.
   // If both GEPs are inbounds, then the subtract does not have signed overflow.
   if (GEP2) {
-    Value *Offset = EmitGEPOffset(GEP2);
+    bool GEP2IsInBounds = GEP2->isInBounds();
+    Value *Offset = EmitGEPOffset(GEP2, RewriteGEPs);
     Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false,
-                               GEP1->isInBounds() && GEP2->isInBounds());
+                               GEP1IsInBounds && GEP2IsInBounds);
   }
 
   // If we have p - gep(p, ...)  then we have to negate the result.
@@ -2781,6 +2768,16 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
       propagateSelectFMF(NewSel, P == X);
       return NewSel;
     }
+
+    // -(Cond ? X : C) --> Cond ? -X : -C
+    // -(Cond ? C : Y) --> Cond ? -C : -Y
+    if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) {
+      Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
+      Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
+      SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
+      propagateSelectFMF(NewSel, /*CommonOperand=*/true);
+      return NewSel;
+    }
   }
 
   // fneg (copysign x, y) -> copysign x, (fneg y)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 282badd4369330..58b2d8e9dec1c3 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2339,6 +2339,43 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
   return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
 }
 
+// Canonicalization:
+// gep T, (gep i8, base, C1), (Index + C2) into
+// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
+static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
+                                                GEPOperator *Src,
+                                                InstCombinerImpl &IC) {
+  if (GEP.getNumIndices() != 1)
+    return nullptr;
+  auto &DL = IC.getDataLayout();
+  Value *Base;
+  const APInt *C1;
+  if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
+    return nullptr;
+  Value *VarIndex;
+  const APInt *C2;
+  Type *PtrTy = Src->getType()->getScalarType();
+  unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
+  if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
+    return nullptr;
+  if (C1->getBitWidth() != IndexSizeInBits ||
+      C2->getBitWidth() != IndexSizeInBits)
+    return nullptr;
+  Type *BaseType = GEP.getSourceElementType();
+  if (isa<ScalableVectorType>(BaseType))
+    return nullptr;
+  APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
+  APInt NewOffset = TypeSize * *C2 + *C1;
+  if (NewOffset.isZero() ||
+      (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
+    Value *GEPConst =
+        IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset));
+    return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
                                              GEPOperator *Src) {
   // Combine Indices - If the source pointer to this getelementptr instruction
@@ -2347,6 +2384,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
   if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
     return nullptr;
 
+  if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
+    return I;
+
   // For constant GEPs, use a more general offset-based folding approach.
   Type *PtrTy = Src->getType()->getScalarType();
   if (GEP.hasAllConstantIndices() &&
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e5ef0333696d0f..66ee2fce8313e8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2506,6 +2506,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
                           bool Signed = false) {
     Type *srcTy = V->getType();
+    if (srcTy == dstTy)
+      return V;
     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
     if (srcSizeInBits > 1 && dstSizeInBits == 1)
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index edddfb1b92402f..059900f357e64b 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
 
     // We don't need to check non-wrapping here because forward/backward
     // dependence wouldn't be valid if these weren't monotonic accesses.
-    auto *Dist = cast<SCEVConstant>(
+    auto *Dist = dyn_cast<SCEVConstant>(
         PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+    if (!Dist)
+      return false;
     const APInt &Val = Dist->getAPInt();
     return Val == TypeByteSize * StrideLoad;
   }
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3eda669eb8a726..4db72461c95e47 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7524,6 +7524,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
                                       SI->getPointerAddressSpace())) &&
                SI->getPointerOperand() == I;
 
+    // llvm.assume(false/undef) always triggers immediate UB.
+    if (auto *Assume = dyn_cast<AssumeInst>(Use)) {
+      // Ignore assume operand bundles.
+      if (I == Assume->getArgOperand(0))
+        return true;
+    }
+
     if (auto *CB = dyn_cast<CallBase>(Use)) {
       if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
         return false;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a1a28076881cb5..0cd7bd77722260 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15072,11 +15072,16 @@ void BoUpSLP::computeMinimumValueSizes() {
       IsSignedCmp =
           NodeIdx < VectorizableTree.size() &&
           any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
-                 [](const EdgeInfo &EI) {
+                 [&](const EdgeInfo &EI) {
                    return EI.UserTE->getOpcode() == Instruction::ICmp &&
-                          any_of(EI.UserTE->Scalars, [](Value *V) {
+                          any_of(EI.UserTE->Scalars, [&](Value *V) {
                             auto *IC = dyn_cast<ICmpInst>(V);
-                            return IC && IC->isSigned();
+                            return IC &&
+                                   (IC->isSigned() ||
+                                    !isKnownNonNegative(IC->getOperand(0),
+                                                        SimplifyQuery(*DL)) ||
+                                    !isKnownNonNegative(IC->getOperand(1),
+                                                        SimplifyQuery(*DL)));
                           });
                  });
     }
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
index 932129bbb957fa..5312c36e436a21 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
@@ -6,10 +6,9 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) {
 ; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
@@ -44,10 +43,9 @@ exit:
 define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
 ; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
index 51755314896bb3..5f4c732dc19df0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
@@ -8,10 +8,9 @@ declare void @llvm.assume(i1)
 define void @different_non_constant_strides_known_forward(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_forward'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
@@ -45,10 +44,9 @@ exit:
 define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/CodeGen/AArch64/concatbinop.ll b/llvm/test/CodeGen/AArch64/concatbinop.ll
index a13e62e0612cc0..828182d18b38ce 100644
--- a/llvm/test/CodeGen/AArch64/concatbinop.ll
+++ b/llvm/test/CodeGen/AArch64/concatbinop.ll
@@ -5,9 +5,13 @@
 define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_add:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = add <4 x i16> %a, %b
   %y = add <4 x i16> %c, %d
@@ -33,13 +37,9 @@ define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
 ; CHECK-LABEL: concat_addtunc2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    add v1.4h, v2.4h, v3.4h
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %at = trunc <4 x i32> %a to <4 x i16>
   %bt = trunc <4 x i32> %b to <4 x i16>
@@ -54,9 +54,13 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = sub <4 x i16> %a, %b
   %y = sub <4 x i16> %c, %d
@@ -67,9 +71,13 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    mul v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = mul <4 x i16> %a, %b
   %y = mul <4 x i16> %c, %d
@@ -80,9 +88,13 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_xor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor v2.8b, v2.8b, v3.8b
-; CHECK-NEXT:    eor v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %x = xor <4 x i16> %a, %b
   %y = xor <4 x i16> %c, %d
@@ -93,9 +105,13 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fadd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fadd v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = fadd <4 x half> %a, %b
   %y = fadd <4 x half> %c, %d
@@ -106,9 +122,13 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fmul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmul v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    fmul v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = fmul <4 x half> %a, %b
   %y = fmul <4 x half> %c, %d
@@ -119,9 +139,13 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_min:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fminnm v2.4h, v2.4h, v3.4h
-; CHECK-NEXT:    fminnm v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
   %y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d)
@@ -146,21 +170,16 @@ define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture n
 ; CHECK-LABEL: signOf_neon:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldp q1, q2, [x0]
-; CHECK-NEXT:    movi v0.8b, #1
+; CHECK-NEXT:    movi v0.16b, #1
 ; CHECK-NEXT:    ldp q3, q4, [x1]
 ; CHECK-NEXT:    cmhi v5.8h, v1.8h, v3.8h
 ; CHECK-NEXT:    cmhi v6.8h, v2.8h, v4.8h
 ; CHECK-NEXT:    cmhi v1.8h, v3.8h, v1.8h
 ; CHECK-NEXT:    cmhi v2.8h, v4.8h, v2.8h
-; CHECK-NEXT:    xtn v3.8b, v5.8h
-; CHECK-NEXT:    xtn v4.8b, v6.8h
-; CHECK-NEXT:    xtn v1.8b, v1.8h
-; CHECK-NEXT:    xtn v2.8b, v2.8h
-; CHECK-NEXT:    and v3.8b, v3.8b, v0.8b
-; CHECK-NEXT:    and v4.8b, v4.8b, v0.8b
-; CHECK-NEXT:    orr v0.8b, v3.8b, v1.8b
-; CHECK-NEXT:    orr v1.8b, v4.8b, v2.8b
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    uzp1 v3.16b, v5.16b, v6.16b
+; CHECK-NEXT:    uzp1 v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    and v0.16b, v3.16b, v0.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
 entry:
   %0 = load <8 x i16>, ptr %a, align 2
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 3254c5ebe9c6b1..ab7cea8dfb7789 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -2825,10 +2825,11 @@ entry:
 define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi d2, #0x00ffff0000ffff
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    movi v2.2d, #0x00ffff0000ffff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-SD-NEXT:    uaddlv d0, v0.4s
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ret
@@ -3578,10 +3579,11 @@ entry:
 define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi d2, #0x0000ff000000ff
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v2.8b
-; CHECK-SD-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    movi v2.2d, #0x0000ff000000ff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-SD-NEXT:    uaddlv d0, v0.4s
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index bfd18f1b52a51b..a3b6c283512f3e 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -152,3 +152,57 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
   %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
   ret ptr addrspace(3) %load
 }
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_f16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
+  %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
+  %ret = bitcast half %load to i16
+  ret i16 %ret
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_f16_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
+  %ret = bitcast half %load to i16
+  ret i16 %ret
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_bf16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
+  %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
+  %ret = bitcast bfloat %load to i16
+  ret i16 %ret
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_bf16_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
+  %ret = bitcast bfloat %load to i16
+  ret i16 %ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index 71e24c1692c7f4..cd1e1fb1add473 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -101,3 +101,56 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val)
   ret void
 }
 
+; GCN-LABEL: {{^}}atomic_store_monotonic_f16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
+  %val = bitcast i16 %arg.val to half
+  store atomic half %val, ptr addrspace(3) %ptr monotonic, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_f16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
+  %val = bitcast i16 %arg.val to half
+  %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
+  store atomic half %val, ptr addrspace(3) %gep monotonic, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_bf16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
+  %val = bitcast i16 %arg.val to bfloat
+  store atomic bfloat %val, ptr addrspace(3) %ptr monotonic, align 2
+  ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_bf16:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
+  %val = bitcast i16 %arg.val to bfloat
+  %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
+  store atomic bfloat %val, ptr addrspace(3) %gep monotonic, align 2
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
index 06ba60518adc04..e44572985e6d2e 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll
@@ -6741,6 +6741,81 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr %out) {
+; GCN1-LABEL: atomic_store_bf16_offset:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GCN1-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    v_mov_b32_e32 v2, s0
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_store_bf16_offset:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GCN2-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    v_mov_b32_e32 v2, s0
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_store_bf16_offset:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GCN3-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    v_mov_b32_e32 v2, s4
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  %gep = getelementptr bfloat, ptr %out, i64 8
+  store atomic bfloat %in, ptr %out seq_cst, align 2
+  ret void
+}
+
+define amdgpu_kernel void @atomic_store_bf16(bfloat %in, ptr %out) {
+; GCN1-LABEL: atomic_store_bf16:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GCN1-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    v_mov_b32_e32 v2, s0
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_store_bf16:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GCN2-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    v_mov_b32_e32 v2, s0
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_store_bf16:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GCN3-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    v_mov_b32_e32 v2, s4
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  store atomic bfloat %in, ptr %out seq_cst, align 2
+  ret void
+}
+
 define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) {
 ; GCN1-LABEL: atomic_inc_i32_offset:
 ; GCN1:       ; %bb.0: ; %entry
@@ -7868,3 +7943,201 @@ entry:
   store i32 %val, ptr %out2
   ret void
 }
+
+define amdgpu_kernel void @atomic_load_f16_offset(ptr %in, ptr %out) {
+; GCN1-LABEL: atomic_load_f16_offset:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    s_add_u32 s0, s0, 16
+; GCN1-NEXT:    s_addc_u32 s1, s1, 0
+; GCN1-NEXT:    v_mov_b32_e32 v0, s0
+; GCN1-NEXT:    v_mov_b32_e32 v1, s1
+; GCN1-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_load_f16_offset:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    s_add_u32 s0, s0, 16
+; GCN2-NEXT:    s_addc_u32 s1, s1, 0
+; GCN2-NEXT:    v_mov_b32_e32 v0, s0
+; GCN2-NEXT:    v_mov_b32_e32 v1, s1
+; GCN2-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_load_f16_offset:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s0
+; GCN3-NEXT:    v_mov_b32_e32 v1, s1
+; GCN3-NEXT:    flat_load_ushort v2, v[0:1] offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  %gep = getelementptr half, ptr %in, i64 8
+  %val = load atomic half, ptr %gep  seq_cst, align 2
+  store half %val, ptr %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_f16(ptr %in, ptr %out) {
+; GCN1-LABEL: atomic_load_f16:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    v_mov_b32_e32 v0, s0
+; GCN1-NEXT:    v_mov_b32_e32 v1, s1
+; GCN1-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_load_f16:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    v_mov_b32_e32 v0, s0
+; GCN2-NEXT:    v_mov_b32_e32 v1, s1
+; GCN2-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_load_f16:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s0
+; GCN3-NEXT:    v_mov_b32_e32 v1, s1
+; GCN3-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  %val = load atomic half, ptr %in seq_cst, align 2
+  store half %val, ptr %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_bf16_offset(ptr %in, ptr %out) {
+; GCN1-LABEL: atomic_load_bf16_offset:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    s_add_u32 s0, s0, 16
+; GCN1-NEXT:    s_addc_u32 s1, s1, 0
+; GCN1-NEXT:    v_mov_b32_e32 v0, s0
+; GCN1-NEXT:    v_mov_b32_e32 v1, s1
+; GCN1-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_load_bf16_offset:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    s_add_u32 s0, s0, 16
+; GCN2-NEXT:    s_addc_u32 s1, s1, 0
+; GCN2-NEXT:    v_mov_b32_e32 v0, s0
+; GCN2-NEXT:    v_mov_b32_e32 v1, s1
+; GCN2-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_load_bf16_offset:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s0
+; GCN3-NEXT:    v_mov_b32_e32 v1, s1
+; GCN3-NEXT:    flat_load_ushort v2, v[0:1] offset:16 glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  %gep = getelementptr bfloat, ptr %in, i64 8
+  %val = load atomic bfloat, ptr %gep  seq_cst, align 2
+  store bfloat %val, ptr %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_bf16(ptr %in, ptr %out) {
+; GCN1-LABEL: atomic_load_bf16:
+; GCN1:       ; %bb.0:
+; GCN1-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN1-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN1-NEXT:    v_mov_b32_e32 v0, s0
+; GCN1-NEXT:    v_mov_b32_e32 v1, s1
+; GCN1-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN1-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN1-NEXT:    buffer_wbinvl1_vol
+; GCN1-NEXT:    v_mov_b32_e32 v0, s2
+; GCN1-NEXT:    v_mov_b32_e32 v1, s3
+; GCN1-NEXT:    flat_store_short v[0:1], v2
+; GCN1-NEXT:    s_endpgm
+;
+; GCN2-LABEL: atomic_load_bf16:
+; GCN2:       ; %bb.0:
+; GCN2-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN2-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN2-NEXT:    v_mov_b32_e32 v0, s0
+; GCN2-NEXT:    v_mov_b32_e32 v1, s1
+; GCN2-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN2-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN2-NEXT:    buffer_wbinvl1_vol
+; GCN2-NEXT:    v_mov_b32_e32 v0, s2
+; GCN2-NEXT:    v_mov_b32_e32 v1, s3
+; GCN2-NEXT:    flat_store_short v[0:1], v2
+; GCN2-NEXT:    s_endpgm
+;
+; GCN3-LABEL: atomic_load_bf16:
+; GCN3:       ; %bb.0:
+; GCN3-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN3-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN3-NEXT:    v_mov_b32_e32 v0, s0
+; GCN3-NEXT:    v_mov_b32_e32 v1, s1
+; GCN3-NEXT:    flat_load_ushort v2, v[0:1] glc
+; GCN3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN3-NEXT:    buffer_wbinvl1_vol
+; GCN3-NEXT:    v_mov_b32_e32 v0, s2
+; GCN3-NEXT:    v_mov_b32_e32 v1, s3
+; GCN3-NEXT:    flat_store_short v[0:1], v2
+; GCN3-NEXT:    s_endpgm
+  %val = load atomic bfloat, ptr %in seq_cst, align 2
+  store bfloat %val, ptr %out
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
index 674d7a3c5c9b44..dac3a3db7b450b 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
@@ -6216,6 +6216,81 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_store_bf16_offset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0 offset:16
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_store_bf16_offset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_u32 s0, s2, 16
+; VI-NEXT:    s_addc_u32 s1, s3, 0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s4
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_store_bf16_offset:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3] offset:16
+; GFX9-NEXT:    s_endpgm
+  %gep = getelementptr bfloat, ptr addrspace(1) %out, i64 8
+  store atomic bfloat %in, ptr addrspace(1) %gep seq_cst, align 2
+  ret void
+}
+
+define amdgpu_kernel void @atomic_store_bf16(bfloat %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_store_bf16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_store_bf16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_store_bf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
+  store atomic bfloat %in, ptr addrspace(1) %out seq_cst, align 2
+  ret void
+}
+
 define amdgpu_kernel void @atomic_inc_i32_offset(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: atomic_inc_i32_offset:
 ; SI:       ; %bb.0: ; %entry
@@ -6963,3 +7038,207 @@ entry:
   store i32 %val, ptr addrspace(1) %out2
   ret void
 }
+
+define amdgpu_kernel void @atomic_load_f16_offset(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_load_f16_offset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
+; SI-NEXT:    s_mov_b32 s2, s6
+; SI-NEXT:    s_mov_b32 s3, s7
+; SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_wbinvl1
+; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_load_f16_offset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s4, s2
+; VI-NEXT:    s_mov_b32 s5, s3
+; VI-NEXT:    s_mov_b32 s2, s6
+; VI-NEXT:    s_mov_b32 s3, s7
+; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    buffer_wbinvl1_vol
+; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_load_f16_offset:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1] offset:16 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    buffer_wbinvl1_vol
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
+  %gep = getelementptr half, ptr addrspace(1) %in, i64 8
+  %val = load atomic half, ptr addrspace(1) %gep  seq_cst, align 2
+  store half %val, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_f16_negoffset(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_load_f16_negoffset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, s7
+; SI-NEXT:    v_mov_b32_e32 v0, 0xfffffe00
+; SI-NEXT:    v_mov_b32_e32 v1, -1
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_wbinvl1
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_load_f16_negoffset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_u32 s0, s0, 0xfffffe00
+; VI-NEXT:    s_addc_u32 s1, s1, -1
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    flat_load_ushort v0, v[0:1] glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    buffer_wbinvl1_vol
+; VI-NEXT:    s_mov_b32 s4, s2
+; VI-NEXT:    s_mov_b32 s5, s3
+; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_load_f16_negoffset:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1] offset:-512 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    buffer_wbinvl1_vol
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
+  %gep = getelementptr half, ptr addrspace(1) %in, i64 -256
+  %val = load atomic half, ptr addrspace(1) %gep  seq_cst, align 2
+  store half %val, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_bf16_offset(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_load_bf16_offset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
+; SI-NEXT:    s_mov_b32 s2, s6
+; SI-NEXT:    s_mov_b32 s3, s7
+; SI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_wbinvl1
+; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_load_bf16_offset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s4, s2
+; VI-NEXT:    s_mov_b32 s5, s3
+; VI-NEXT:    s_mov_b32 s2, s6
+; VI-NEXT:    s_mov_b32 s3, s7
+; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    buffer_wbinvl1_vol
+; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_load_bf16_offset:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1] offset:16 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    buffer_wbinvl1_vol
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
+  %gep = getelementptr bfloat, ptr addrspace(1) %in, i64 8
+  %val = load atomic bfloat, ptr addrspace(1) %gep  seq_cst, align 2
+  store bfloat %val, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @atomic_load_bf16_negoffset(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; SI-LABEL: atomic_load_bf16_negoffset:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, s7
+; SI-NEXT:    v_mov_b32_e32 v0, 0xfffffe00
+; SI-NEXT:    v_mov_b32_e32 v1, -1
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_wbinvl1
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: atomic_load_bf16_negoffset:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_u32 s0, s0, 0xfffffe00
+; VI-NEXT:    s_addc_u32 s1, s1, -1
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    flat_load_ushort v0, v[0:1] glc
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    buffer_wbinvl1_vol
+; VI-NEXT:    s_mov_b32 s4, s2
+; VI-NEXT:    s_mov_b32 s5, s3
+; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: atomic_load_bf16_negoffset:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v1, v0, s[0:1] offset:-512 glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    buffer_wbinvl1_vol
+; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
+; GFX9-NEXT:    s_endpgm
+  %gep = getelementptr bfloat, ptr addrspace(1) %in, i64 -256
+  %val = load atomic bfloat, ptr addrspace(1) %gep  seq_cst, align 2
+  store bfloat %val, ptr addrspace(1) %out
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
index 14bcc4f994f890..400298bcff4f97 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s
 
 ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok.
 
diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
index 2d1fc21cda89b0..1586a133568b35 100644
--- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll
@@ -1,26 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
-; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s
-; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
-; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s
-; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s
 
 
-; RUN: llc -mtriple=riscv32 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s
-; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s
 
-; RUN: llc -mtriple=riscv64 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s
-; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
 
 
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 141d5ea4182892..61b5e50c6d52f2 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -129,7 +129,8 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64ZMMUL %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64MZMMUL %s
-; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV64A %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,no-trailing-seq-cst-fence %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6C %s
+; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6S %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV64F %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV64D %s
 ; RUN: llc -mtriple=riscv64 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV64C %s
@@ -516,3 +517,10 @@ define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
   ret i32 %1
 }
+
+define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
+  %1 = load atomic i8, ptr %a seq_cst, align 1
+  ret i8 %1
+; A6S: .attribute 14, 2
+; A6C: .attribute 14, 1
+}
diff --git a/llvm/test/CodeGen/RISCV/fixups-diff.ll b/llvm/test/CodeGen/RISCV/fixups-diff.ll
index 84a7d18ed15068..cc1c87b1fe377f 100644
--- a/llvm/test/CodeGen/RISCV/fixups-diff.ll
+++ b/llvm/test/CodeGen/RISCV/fixups-diff.ll
@@ -27,7 +27,7 @@ entry:
 ; CHECK:      }
 
 ; CHECK:      Section {{.*}} .rela.eh_frame {
-; CHECK-NEXT:   0x1C R_RISCV_32_PCREL .L0  0x0
+; CHECK-NEXT:   0x1C R_RISCV_32_PCREL <null> 0x0
 ; CHECK-NEXT: }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index c303690aadfff8..44db3c49db8c3b 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
+; RUN: llc -mtriple=riscv32 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC
-; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC
-; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING
+; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC
+; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING
+; RUN: llc -mtriple=riscv64 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
-; RUN: llc -mtriple=riscv64 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC
-; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC
-; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING
+; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC
+; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING
 
 define i8 @load8(ptr %p) nounwind {
 ; RV32-NO-ATOMIC-LABEL: load8:
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 0745b59c06cc8d..817e2b7d0bd993 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -4,7 +4,9 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBNOZBS
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbs -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBZBS
 
 define i64 @slliuw(i64 %a) nounwind {
 ; RV64I-LABEL: slliuw:
@@ -2733,3 +2735,121 @@ define i64 @mul_neg8(i64 %a) {
   %c = mul i64 %a, -8
   ret i64 %c
 }
+
+define i64 @bext_mul12(i32 %1, i32 %2) {
+; RV64I-LABEL: bext_mul12:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    li a1, 12
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: bext_mul12:
+; RV64ZBANOZBB:       # %bb.0: # %entry
+; RV64ZBANOZBB-NEXT:    srlw a0, a0, a1
+; RV64ZBANOZBB-NEXT:    andi a0, a0, 1
+; RV64ZBANOZBB-NEXT:    sh1add a0, a0, a0
+; RV64ZBANOZBB-NEXT:    slli a0, a0, 2
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBBNOZBS-LABEL: bext_mul12:
+; RV64ZBAZBBNOZBS:       # %bb.0: # %entry
+; RV64ZBAZBBNOZBS-NEXT:    srlw a0, a0, a1
+; RV64ZBAZBBNOZBS-NEXT:    andi a0, a0, 1
+; RV64ZBAZBBNOZBS-NEXT:    sh1add a0, a0, a0
+; RV64ZBAZBBNOZBS-NEXT:    slli a0, a0, 2
+; RV64ZBAZBBNOZBS-NEXT:    ret
+;
+; RV64ZBAZBBZBS-LABEL: bext_mul12:
+; RV64ZBAZBBZBS:       # %bb.0: # %entry
+; RV64ZBAZBBZBS-NEXT:    bext a0, a0, a1
+; RV64ZBAZBBZBS-NEXT:    sh1add a0, a0, a0
+; RV64ZBAZBBZBS-NEXT:    slli a0, a0, 2
+; RV64ZBAZBBZBS-NEXT:    ret
+entry:
+  %3 = lshr i32 %1, %2
+  %4 = and i32 %3, 1
+  %5 = zext nneg i32 %4 to i64
+  %6 = mul i64 %5, 12
+  ret i64 %6
+}
+
+define i64 @bext_mul45(i32 %1, i32 %2) {
+; RV64I-LABEL: bext_mul45:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    li a1, 45
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: bext_mul45:
+; RV64ZBANOZBB:       # %bb.0: # %entry
+; RV64ZBANOZBB-NEXT:    srlw a0, a0, a1
+; RV64ZBANOZBB-NEXT:    andi a0, a0, 1
+; RV64ZBANOZBB-NEXT:    sh2add a0, a0, a0
+; RV64ZBANOZBB-NEXT:    sh3add a0, a0, a0
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBBNOZBS-LABEL: bext_mul45:
+; RV64ZBAZBBNOZBS:       # %bb.0: # %entry
+; RV64ZBAZBBNOZBS-NEXT:    srlw a0, a0, a1
+; RV64ZBAZBBNOZBS-NEXT:    andi a0, a0, 1
+; RV64ZBAZBBNOZBS-NEXT:    sh2add a0, a0, a0
+; RV64ZBAZBBNOZBS-NEXT:    sh3add a0, a0, a0
+; RV64ZBAZBBNOZBS-NEXT:    ret
+;
+; RV64ZBAZBBZBS-LABEL: bext_mul45:
+; RV64ZBAZBBZBS:       # %bb.0: # %entry
+; RV64ZBAZBBZBS-NEXT:    bext a0, a0, a1
+; RV64ZBAZBBZBS-NEXT:    sh2add a0, a0, a0
+; RV64ZBAZBBZBS-NEXT:    sh3add a0, a0, a0
+; RV64ZBAZBBZBS-NEXT:    ret
+entry:
+  %3 = lshr i32 %1, %2
+  %4 = and i32 %3, 1
+  %5 = zext nneg i32 %4 to i64
+  %6 = mul i64 %5, 45
+  ret i64 %6
+}
+
+define i64 @bext_mul132(i32 %1, i32 %2) {
+; RV64I-LABEL: bext_mul132:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    li a1, 132
+; RV64I-NEXT:    mul a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBANOZBB-LABEL: bext_mul132:
+; RV64ZBANOZBB:       # %bb.0: # %entry
+; RV64ZBANOZBB-NEXT:    srlw a0, a0, a1
+; RV64ZBANOZBB-NEXT:    andi a0, a0, 1
+; RV64ZBANOZBB-NEXT:    slli a1, a0, 7
+; RV64ZBANOZBB-NEXT:    sh2add a0, a0, a1
+; RV64ZBANOZBB-NEXT:    ret
+;
+; RV64ZBAZBBNOZBS-LABEL: bext_mul132:
+; RV64ZBAZBBNOZBS:       # %bb.0: # %entry
+; RV64ZBAZBBNOZBS-NEXT:    srlw a0, a0, a1
+; RV64ZBAZBBNOZBS-NEXT:    andi a0, a0, 1
+; RV64ZBAZBBNOZBS-NEXT:    slli a1, a0, 7
+; RV64ZBAZBBNOZBS-NEXT:    sh2add a0, a0, a1
+; RV64ZBAZBBNOZBS-NEXT:    ret
+;
+; RV64ZBAZBBZBS-LABEL: bext_mul132:
+; RV64ZBAZBBZBS:       # %bb.0: # %entry
+; RV64ZBAZBBZBS-NEXT:    bext a0, a0, a1
+; RV64ZBAZBBZBS-NEXT:    slli a1, a0, 7
+; RV64ZBAZBBZBS-NEXT:    sh2add a0, a0, a1
+; RV64ZBAZBBZBS-NEXT:    ret
+entry:
+  %3 = lshr i32 %1, %2
+  %4 = and i32 %3, 1
+  %5 = zext nneg i32 %4 to i64
+  %6 = mul i64 %5, 132
+  ret i64 %6
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
new file mode 100644
index 00000000000000..6435c1c14e061e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr='+v' -O3 %s -o - | FileCheck %s
+
+declare <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  i32)
+
+declare <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i1>,
+  i32, i32)
+
+declare <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  i32)
+
+declare <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>,
+  i32)
+
+define <vscale x 1 x i8> @simple_vadd_vv(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
+; CHECK-LABEL: simple_vadd_vv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v8, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 %2)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    i32 %2)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    i32 %2)
+
+  ret <vscale x 1 x i8> %c
+}
+
+define <vscale x 1 x i8> @simple_vadd_vsub_vv(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
+; CHECK-LABEL: simple_vadd_vsub_vv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vsub.vv v9, v8, v9
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 %2)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    i32 %2)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    i32 %2)
+
+  ret <vscale x 1 x i8> %c
+}
+
+define <vscale x 1 x i8> @simple_vmul_vv(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
+; CHECK-LABEL: simple_vmul_vv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT:    vmul.vv v9, v8, v9
+; CHECK-NEXT:    vmul.vv v8, v8, v8
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 %2)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    i32 %2)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    i32 %2)
+
+  ret <vscale x 1 x i8> %c
+}
+
+; With passthru and masks.
+define <vscale x 1 x i8> @vadd_vv_passthru(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
+; CHECK-LABEL: vadd_vv_passthru:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vadd.vv v10, v8, v9
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vadd.vv v9, v8, v8
+; CHECK-NEXT:    vadd.vv v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 %2)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    i32 %2)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    i32 %2)
+
+  ret <vscale x 1 x i8> %c
+}
+
+define <vscale x 1 x i8> @vadd_vv_passthru_negative(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2) nounwind {
+; CHECK-LABEL: vadd_vv_passthru_negative:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vadd.vv v10, v8, v9
+; CHECK-NEXT:    vadd.vv v9, v8, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 %2)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    i32 %2)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    i32 %2)
+
+  ret <vscale x 1 x i8> %c
+}
+
+define <vscale x 1 x i8> @vadd_vv_mask(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2, <vscale x 1 x i1> %m) nounwind {
+; CHECK-LABEL: vadd_vv_mask:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vadd.vv v10, v8, v9, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vadd.vv v9, v8, v8, v0.t
+; CHECK-NEXT:    vadd.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i1> %m,
+    i32 %2, i32 1)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    <vscale x 1 x i1> %m,
+    i32 %2, i32 1)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    <vscale x 1 x i1> %m,
+    i32 %2, i32 1)
+
+  ret <vscale x 1 x i8> %c
+}
+
+define <vscale x 1 x i8> @vadd_vv_mask_negative(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, i32 %2, <vscale x 1 x i1> %m, <vscale x 1 x i1> %m2) nounwind {
+; CHECK-LABEL: vadd_vv_mask_negative:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    vadd.vv v11, v8, v9, v0.t
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vadd.vv v9, v8, v11, v0.t
+; CHECK-NEXT:    vmv1r.v v0, v10
+; CHECK-NEXT:    vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    <vscale x 1 x i1> %m,
+    i32 %2, i32 1)
+
+  %b = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %a,
+    <vscale x 1 x i1> %m,
+    i32 %2, i32 1)
+
+  %c = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %b,
+    <vscale x 1 x i1> %m2,
+    i32 %2, i32 1)
+
+  ret <vscale x 1 x i8> %c
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 621445fb2dc5e4..4ff2fc7a5fff5d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -487,6 +487,54 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
+define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) {
+; CHECK-LABEL: saxpy_vec_demanded_fields:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, ma
+; CHECK-NEXT:    beqz a3, .LBB9_2
+; CHECK-NEXT:  .LBB9_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle32.v v16, (a2)
+; CHECK-NEXT:    slli a4, a3, 2
+; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
+; CHECK-NEXT:    vfmacc.vf v16, fa0, v8
+; CHECK-NEXT:    vse32.v v16, (a2)
+; CHECK-NEXT:    sub a0, a0, a3
+; CHECK-NEXT:    vsetvli a3, a0, e16, m4, ta, ma
+; CHECK-NEXT:    add a2, a2, a4
+; CHECK-NEXT:    bnez a3, .LBB9_1
+; CHECK-NEXT:  .LBB9_2: # %for.end
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)
+  %cmp.not13 = icmp eq i64 %0, 0
+  br i1 %cmp.not13, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %1 = phi i64 [ %7, %for.body ], [ %0, %entry ]
+  %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ]
+  %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
+  %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
+  %2 = bitcast ptr %x.addr.015 to ptr
+  %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %2, i64 %1)
+  %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1
+  %4 = bitcast ptr %y.addr.014 to ptr
+  %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, ptr %4, i64 %1)
+  %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 7, i64 %1, i64 0)
+  tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, ptr %4, i64 %1)
+  %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1
+  %sub = sub i64 %n.addr.016, %1
+  %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2)
+  %cmp.not = icmp eq i64 %7, 0
+  br i1 %cmp.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
 declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, ptr nocapture, i64)
 declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64, i64, i64)
@@ -501,12 +549,12 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0(ptr %x, ptr %y, <vscale x 2 x i32>
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v9, (a0)
 ; CHECK-NEXT:    andi a3, a3, 1
-; CHECK-NEXT:    beqz a3, .LBB9_2
+; CHECK-NEXT:    beqz a3, .LBB10_2
 ; CHECK-NEXT:  # %bb.1: # %if
 ; CHECK-NEXT:    vle16.v v10, (a1)
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vwcvt.x.x.v v8, v10
-; CHECK-NEXT:  .LBB9_2: # %if.end
+; CHECK-NEXT:  .LBB10_2: # %if.end
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-NEXT:    ret
@@ -540,19 +588,19 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl,
 ; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v9, (a0)
 ; CHECK-NEXT:    andi a4, a4, 1
-; CHECK-NEXT:    beqz a4, .LBB10_2
+; CHECK-NEXT:    beqz a4, .LBB11_2
 ; CHECK-NEXT:  # %bb.1: # %if
 ; CHECK-NEXT:    vle16.v v10, (a1)
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vwadd.wv v9, v9, v10
-; CHECK-NEXT:  .LBB10_2: # %if.end
+; CHECK-NEXT:  .LBB11_2: # %if.end
 ; CHECK-NEXT:    andi a5, a5, 1
-; CHECK-NEXT:    beqz a5, .LBB10_4
+; CHECK-NEXT:    beqz a5, .LBB11_4
 ; CHECK-NEXT:  # %bb.3: # %if2
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v10, (a2)
 ; CHECK-NEXT:    vwadd.wv v9, v9, v10
-; CHECK-NEXT:  .LBB10_4: # %if2.end
+; CHECK-NEXT:  .LBB11_4: # %if2.end
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-NEXT:    ret
@@ -586,11 +634,11 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
 ; CHECK-LABEL: vlmax:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a6, zero, e64, m1, ta, ma
-; CHECK-NEXT:    blez a0, .LBB11_3
+; CHECK-NEXT:    blez a0, .LBB12_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    slli a4, a6, 3
-; CHECK-NEXT:  .LBB11_2: # %for.body
+; CHECK-NEXT:  .LBB12_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vle64.v v8, (a2)
 ; CHECK-NEXT:    vle64.v v9, (a3)
@@ -600,8 +648,8 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) {
 ; CHECK-NEXT:    add a1, a1, a4
 ; CHECK-NEXT:    add a3, a3, a4
 ; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    blt a5, a0, .LBB11_2
-; CHECK-NEXT:  .LBB11_3: # %for.end
+; CHECK-NEXT:    blt a5, a0, .LBB12_2
+; CHECK-NEXT:  .LBB12_3: # %for.end
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
@@ -633,18 +681,18 @@ define void @vector_init_vlmax(i64 %N, ptr %c) {
 ; CHECK-LABEL: vector_init_vlmax:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT:    blez a0, .LBB12_3
+; CHECK-NEXT:    blez a0, .LBB13_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    slli a4, a2, 3
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:  .LBB12_2: # %for.body
+; CHECK-NEXT:  .LBB13_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    add a3, a3, a2
 ; CHECK-NEXT:    add a1, a1, a4
-; CHECK-NEXT:    blt a3, a0, .LBB12_2
-; CHECK-NEXT:  .LBB12_3: # %for.end
+; CHECK-NEXT:    blt a3, a0, .LBB13_2
+; CHECK-NEXT:  .LBB13_3: # %for.end
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0)
@@ -669,20 +717,20 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) {
 ; CHECK-LABEL: vector_init_vsetvli_N:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli a2, a0, e64, m1, ta, ma
-; CHECK-NEXT:    blez a0, .LBB13_3
+; CHECK-NEXT:    blez a0, .LBB14_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    slli a4, a2, 3
 ; CHECK-NEXT:    vsetvli a5, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:  .LBB13_2: # %for.body
+; CHECK-NEXT:  .LBB14_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    add a3, a3, a2
 ; CHECK-NEXT:    add a1, a1, a4
-; CHECK-NEXT:    blt a3, a0, .LBB13_2
-; CHECK-NEXT:  .LBB13_3: # %for.end
+; CHECK-NEXT:    blt a3, a0, .LBB14_2
+; CHECK-NEXT:  .LBB14_3: # %for.end
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0)
@@ -711,13 +759,13 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) {
 ; CHECK-NEXT:    slli a4, a3, 3
 ; CHECK-NEXT:    vsetvli a5, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:  .LBB14_1: # %for.body
+; CHECK-NEXT:  .LBB15_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    add a2, a2, a3
 ; CHECK-NEXT:    add a1, a1, a4
-; CHECK-NEXT:    blt a2, a0, .LBB14_1
+; CHECK-NEXT:    blt a2, a0, .LBB15_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
 ; CHECK-NEXT:    ret
 entry:
@@ -745,13 +793,13 @@ define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:  .LBB15_1: # %for.body
+; CHECK-NEXT:  .LBB16_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    addi a2, a2, 4
 ; CHECK-NEXT:    addi a1, a1, 32
-; CHECK-NEXT:    blt a2, a0, .LBB15_1
+; CHECK-NEXT:    blt a2, a0, .LBB16_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
 ; CHECK-NEXT:    ret
 entry:
@@ -779,13 +827,13 @@ define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) {
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:  .LBB16_1: # %for.body
+; CHECK-NEXT:  .LBB17_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
 ; CHECK-NEXT:    vse64.v v8, (a1)
 ; CHECK-NEXT:    addi a2, a2, 4
 ; CHECK-NEXT:    addi a1, a1, 32
-; CHECK-NEXT:    blt a2, a0, .LBB16_1
+; CHECK-NEXT:    blt a2, a0, .LBB17_1
 ; CHECK-NEXT:  # %bb.2: # %for.end
 ; CHECK-NEXT:    ret
 entry:
@@ -861,10 +909,10 @@ define <vscale x 1 x double> @compat_store_consistency(i1 %cond, <vscale x 1 x d
 ; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
 ; CHECK-NEXT:    vs1r.v v8, (a1)
-; CHECK-NEXT:    beqz a0, .LBB19_2
+; CHECK-NEXT:    beqz a0, .LBB20_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    vse32.v v10, (a2)
-; CHECK-NEXT:  .LBB19_2: # %if.end
+; CHECK-NEXT:  .LBB20_2: # %if.end
 ; CHECK-NEXT:    ret
 entry:
   %res = fadd <vscale x 1 x double> %a, %b
@@ -886,16 +934,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nou
 ; CHECK-LABEL: test_ratio_only_vmv_s_x:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi a2, a2, 1
-; CHECK-NEXT:    beqz a2, .LBB20_2
+; CHECK-NEXT:    beqz a2, .LBB21_2
 ; CHECK-NEXT:  # %bb.1: # %if
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v9, (a1)
 ; CHECK-NEXT:    vwcvt.x.x.v v8, v9
-; CHECK-NEXT:    j .LBB20_3
-; CHECK-NEXT:  .LBB20_2:
+; CHECK-NEXT:    j .LBB21_3
+; CHECK-NEXT:  .LBB21_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:  .LBB20_3: # %if.end
+; CHECK-NEXT:  .LBB21_3: # %if.end
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
 ; CHECK-NEXT:    vmv.s.x v8, zero
 ; CHECK-NEXT:    ret
@@ -918,16 +966,16 @@ define <vscale x 2 x i32> @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) no
 ; CHECK-LABEL: test_ratio_only_vmv_s_x2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi a2, a2, 1
-; CHECK-NEXT:    beqz a2, .LBB21_2
+; CHECK-NEXT:    beqz a2, .LBB22_2
 ; CHECK-NEXT:  # %bb.1: # %if
 ; CHECK-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    j .LBB21_3
-; CHECK-NEXT:  .LBB21_2:
+; CHECK-NEXT:    j .LBB22_3
+; CHECK-NEXT:  .LBB22_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle16.v v9, (a1)
 ; CHECK-NEXT:    vwcvt.x.x.v v8, v9
-; CHECK-NEXT:  .LBB21_3: # %if.end
+; CHECK-NEXT:  .LBB22_3: # %if.end
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
 ; CHECK-NEXT:    vmv.s.x v8, zero
 ; CHECK-NEXT:    ret
@@ -953,13 +1001,13 @@ define void @pre_over_vle(ptr %A) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi a1, a0, 800
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:  .LBB22_1: # %vector.body
+; CHECK-NEXT:  .LBB23_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vsext.vf4 v9, v8
 ; CHECK-NEXT:    vse32.v v9, (a0)
 ; CHECK-NEXT:    addi a0, a0, 8
-; CHECK-NEXT:    bne a0, a1, .LBB22_1
+; CHECK-NEXT:    bne a0, a1, .LBB23_1
 ; CHECK-NEXT:  # %bb.2: # %exit
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll
index d28836d560377e..d6a1d8d6e1366f 100644
--- a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll
+++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll
@@ -18,21 +18,21 @@
 ; RELOCS-BOTH:         Section ({{.*}}) .rela.debug_frame {
 ; RELOCS-NORL-NEXT:      0x1C R_LARCH_32 .debug_frame 0x0
 ; RELOCS-NORL-NEXT:      0x20 R_LARCH_64 .text 0x0
-; RELOCS-ENRL-NEXT:      0x1C R_LARCH_32 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x20 R_LARCH_64 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x28 R_LARCH_ADD64 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x28 R_LARCH_SUB64 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x3F R_LARCH_ADD6 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x3F R_LARCH_SUB6 .L0  0x0
+; RELOCS-ENRL-NEXT:      0x1C R_LARCH_32 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x20 R_LARCH_64 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x28 R_LARCH_ADD64 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x28 R_LARCH_SUB64 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x3F R_LARCH_ADD6 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x3F R_LARCH_SUB6 <null> 0x0
 ; RELOCS-BOTH-NEXT:    }
 ; RELOCS-BOTH:         Section ({{.*}}) .rela.debug_line {
 ; RELOCS-BOTH-NEXT:      0x22 R_LARCH_32 .debug_line_str 0x0
 ; RELOCS-BOTH-NEXT:      0x31 R_LARCH_32 .debug_line_str 0x2
 ; RELOCS-BOTH-NEXT:      0x46 R_LARCH_32 .debug_line_str 0x1B
 ; RELOCS-NORL-NEXT:      0x4F R_LARCH_64 .text 0x0
-; RELOCS-ENRL-NEXT:      0x4F R_LARCH_64 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x5F R_LARCH_ADD16 .L0  0x0
-; RELOCS-ENRL-NEXT:      0x5F R_LARCH_SUB16 .L0  0x0
+; RELOCS-ENRL-NEXT:      0x4F R_LARCH_64 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x5F R_LARCH_ADD16 <null> 0x0
+; RELOCS-ENRL-NEXT:      0x5F R_LARCH_SUB16 <null> 0x0
 ; RELOCS-BOTH-NEXT:    }
 ; RELOCS-BOTH-NEXT:  ]
 
diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
index 99594b5e01e955..e5de1713f4e00d 100644
--- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -6,14 +6,14 @@
 
 ; Check that we actually have relocations, otherwise this is kind of pointless.
 ; READOBJ-RELOCS:  Section ({{.*}}) .rela.debug_info {
-; READOBJ-RELOCS:    0x1B R_RISCV_ADD32 .L0  0x0
-; READOBJ-RELOCS-NEXT:    0x1B R_RISCV_SUB32 .L0  0x0
+; READOBJ-RELOCS:    0x1B R_RISCV_ADD32 <null> 0x0
+; READOBJ-RELOCS-NEXT:    0x1B R_RISCV_SUB32 <null> 0x0
 ; READOBJ-RELOCS:  Section ({{.*}}) .rela.debug_frame {
-; READOBJ-RELOCS:    0x20 R_RISCV_ADD32 .L0  0x0
-; READOBJ-RELOCS-NEXT:    0x20 R_RISCV_SUB32 .L0  0x0
+; READOBJ-RELOCS:    0x20 R_RISCV_ADD32 <null> 0x0
+; READOBJ-RELOCS-NEXT:    0x20 R_RISCV_SUB32 <null> 0x0
 ; READOBJ-RELOCS:  Section ({{.*}}) .rela.debug_line {
-; READOBJ-RELOCS:    0x5A R_RISCV_ADD16 .L0  0x0
-; READOBJ-RELOCS-NEXT:    0x5A R_RISCV_SUB16 .L0  0x0
+; READOBJ-RELOCS:    0x5A R_RISCV_ADD16 <null> 0x0
+; READOBJ-RELOCS-NEXT:    0x5A R_RISCV_SUB16 <null> 0x0
 
 ; Check that we can print the source, even with relocations.
 ; OBJDUMP-SOURCE: Disassembly of section .text:
diff --git a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll
index ffef0ec2340684..f655a7c0a7ef42 100644
--- a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll
+++ b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll
@@ -4,11 +4,11 @@
 ; RUN:     | FileCheck -check-prefix=RELAX-DWARFDUMP %s
 ;
 ; RELAX:      Section ({{.*}}) .rela.eh_frame {
-; RELAX-NEXT:   0x1C R_RISCV_32_PCREL .L0  0x0
-; RELAX-NEXT:   0x30 R_RISCV_32_PCREL .L0  0x0
-; RELAX-NEXT:   0x44 R_RISCV_32_PCREL .L0  0x0
-; RELAX-NEXT:   0x48 R_RISCV_ADD32 .L0  0x0
-; RELAX-NEXT:   0x48 R_RISCV_SUB32 .L0  0x0
+; RELAX-NEXT:   0x1C R_RISCV_32_PCREL <null> 0x0
+; RELAX-NEXT:   0x30 R_RISCV_32_PCREL <null> 0x0
+; RELAX-NEXT:   0x44 R_RISCV_32_PCREL <null> 0x0
+; RELAX-NEXT:   0x48 R_RISCV_ADD32 <null> 0x0
+; RELAX-NEXT:   0x48 R_RISCV_SUB32 <null> 0x0
 ; RELAX-NEXT:  }
 
 ; RELAX-DWARFDUMP-NOT: error: failed to compute relocation
diff --git a/llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s b/llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s
similarity index 71%
rename from llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s
rename to llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s
index 0b54f104ab953e..1e0fa8a3061830 100644
--- a/llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s
+++ b/llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s
@@ -1,11 +1,10 @@
 # REQUIRES: riscv-registered-target
-## Ignore .L0 symbols that are generated by LLVM integrated assembler and GNU
-## assembler for .debug_line/.eh_frame related assembler directives.
+## Ignore empty name symbols.
 
 # RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t
 # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYM
 
-# SYM: 0000000000000004  0 NOTYPE LOCAL  DEFAULT [[#]] .L0 {{$}}
+# SYM: 0000000000000004  0 NOTYPE LOCAL  DEFAULT [[#]] {{$}}
 # SYM: 0000000000000000  0 NOTYPE GLOBAL DEFAULT [[#]] foo
 
 ## Make sure we test at an address larger than or equal to an empty name symbol.
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
index e7114e4d643c6f..a5038022dfe0c3 100644
--- a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
+++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
@@ -7,7 +7,7 @@
 # the section start and section end. So that by relocating these symbol, the section length
 # can be calculated.
 #
-# CHECK: Creating defined graph symbol for ELF symbol ".L0 "
+# CHECK: Creating defined graph symbol for ELF symbol ""
 # CHECK: Creating defined graph symbol for ELF symbol "main"
         .text
         .globl main
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
new file mode 100644
index 00000000000000..39b2b6225d8b10
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -0,0 +1,3617 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test1(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test88(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test87(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test86(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test85(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test84(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test83(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test82(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test81(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test80(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test79(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test78(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test77(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test76(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP22]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test75(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to x86_mmx
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[TMP14]], x86_mmx [[TMP15]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP22]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test74(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP22]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test73(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test72(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+define i64 @test72_2(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test72_2(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test71(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[TMP6]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test70(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+define i64 @test70_2(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test70_2(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test69(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test68(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[TMP6]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test67(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test66(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+define i64 @test66_2(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test66_2(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx
+; CHECK-NEXT:    [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test65(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test64(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test63(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64
+; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test62(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test61(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test60(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64
+; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test59(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test58(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP17]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test56(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test55(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test54(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test53(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test52(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test51(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test50(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test49(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP12]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP18]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test48(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test47(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test46(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test45(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test44(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test43(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test42(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test41(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test40(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test39(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test38(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test37(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test36(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test35(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test34(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test33(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test32(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP10]], 48
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test31(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test30(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test29(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test28(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test27(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test26(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind
+
+define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 {
+; CHECK-LABEL: define void @test25(
+; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], x86_mmx [[MMX_VAR_I]]) #[[ATTR2]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind
+  ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i32 @test24(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP6]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx [[MMX_VAR_I]]) #[[ATTR2]]
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+  ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 {
+; CHECK-LABEL: define void @test23(
+; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8>
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP5]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP3]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK:       9:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.x86.mmx.maskmovq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast <1 x i64> %n to <8 x i8>
+  %1 = bitcast <1 x i64> %d to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind
+  ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test22(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP13]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test21(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i32 @test21_2(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i32 @test21_2(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <2 x i32>
+  %5 = extractelement <2 x i32> %4, i32 0
+  ret i32 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test20(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to i64
+; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to i64
+; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) #0 {
+; CHECK-LABEL: define <2 x double> @test19(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP7]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK:       5:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+  ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) #0 {
+; CHECK-LABEL: define i64 @test18(
+; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) #0 {
+; CHECK-LABEL: define i64 @test17(
+; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       3:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test16(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
+; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
+; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[_MSPROP1]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]], i8 16)
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64
+; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test15(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP10]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test14(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <4 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP10]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) #0 {
+; CHECK-LABEL: define i64 @test13(
+; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <8 x i8>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP10]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test12(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test11(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test10(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test9(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test8(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test7(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP21]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP14]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP20]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test6(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test5(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test4(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test3(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 {
+; CHECK-LABEL: define i64 @test2(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]]
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind #0 {
+; ALL-LABEL: test89:
+; ALL:       # %bb.0:
+; ALL-NEXT:    cvtpi2ps %mm0, %xmm0
+; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: define <4 x float> @test89(
+; CHECK-SAME: <4 x float> [[A:%.*]], x86_mmx [[B:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], x86_mmx [[B]])
+; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x float> [[C]]
+;
+  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
+  ret <4 x float> %c
+}
+
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
+
+define void @test90() #0 {
+; ALL-LABEL: test90:
+; ALL:       # %bb.0:
+; ALL-NEXT:    emms
+; ALL-NEXT:    ret{{[l|q]}}
+; CHECK-LABEL: define void @test90(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @llvm.x86.mmx.emms()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.x86.mmx.emms()
+  ret void
+}
+
+declare void @llvm.x86.mmx.emms()
+
+define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 {
+; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16(
+; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx [[TMP0]], i32 [[D]], i32 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to <1 x i64>
+; CHECK-NEXT:    store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <1 x i64> [[TMP2]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a.coerce to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2)
+  %2 = bitcast x86_mmx %1 to <1 x i64>
+  ret <1 x i64> %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg)
+
+define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 {
+; CHECK-LABEL: define i32 @test_mm_extract_pi16(
+; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK:       3:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    unreachable
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx [[TMP0]], i32 2)
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %0 = bitcast <1 x i64> %a.coerce to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2)
+  ret i32 %1
+}
+
+declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg)
+
+attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vscale.ll b/llvm/test/Instrumentation/MemorySanitizer/vscale.ll
new file mode 100644
index 00000000000000..b1c64188157077
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/vscale.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -msan-check-access-address=0 -passes="msan" 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_load_store_i32(ptr %a, ptr %b) sanitize_memory {
+; CHECK-LABEL: define void @test_load_store_i32(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    store <vscale x 4 x i32> [[_MSLD]], ptr [[TMP7]], align 16
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP1]], ptr [[B]], align 16
+; CHECK-NEXT:    ret void
+;
+  %1 = load <vscale x 4 x i32>, ptr %a
+  store <vscale x 4 x i32> %1, ptr %b
+  ret void
+}
+
+define void @test_load_store_add_int(ptr %a, ptr %b) sanitize_memory {
+; CHECK-LABEL: define void @test_load_store_add_int(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 8 x i64>, ptr [[A]], align 64
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP4]], align 64
+; CHECK-NEXT:    [[TMP5:%.*]] = load <vscale x 8 x i64>, ptr [[B]], align 64
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <vscale x 8 x i64>, ptr [[TMP8]], align 64
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <vscale x 8 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    store <vscale x 8 x i64> [[_MSLD1]], ptr [[TMP12]], align 64
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP5]], ptr [[B]], align 64
+; CHECK-NEXT:    ret void
+;
+  %1 = load <vscale x 8 x i64>, ptr %a
+  %2 = load <vscale x 8 x i64>, ptr %b
+  %3 = add <vscale x 8 x i64> %1, %2
+  store <vscale x 8 x i64> %2, ptr %b
+  ret void
+}
+
+define void @test_load_store_float(ptr %a, ptr %b) sanitize_memory {
+; CHECK-LABEL: define void @test_load_store_float(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    store <vscale x 4 x i32> [[_MSLD]], ptr [[TMP7]], align 16
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP1]], ptr [[B]], align 16
+; CHECK-NEXT:    ret void
+;
+  %1 = load <vscale x 4 x float>, ptr %a
+  store <vscale x 4 x float> %1, ptr %b
+  ret void
+}
+
+define void @test_load_store_add_float(ptr %a, ptr %b) sanitize_memory {
+; CHECK-LABEL: define void @test_load_store_add_float(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x float>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load <vscale x 2 x float>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP8]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <vscale x 2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <vscale x 2 x float> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    store <vscale x 2 x i32> [[_MSLD1]], ptr [[TMP12]], align 8
+; CHECK-NEXT:    store <vscale x 2 x float> [[TMP5]], ptr [[B]], align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = load <vscale x 2 x float>, ptr %a
+  %2 = load <vscale x 2 x float>, ptr %b
+  %3 = fadd <vscale x 2 x float> %1, %2
+  store <vscale x 2 x float> %2, ptr %b
+  ret void
+}
diff --git a/llvm/test/MC/ELF/RISCV/gen-dwarf.s b/llvm/test/MC/ELF/RISCV/gen-dwarf.s
index 34d02f5da44f29..342ed1cc0e7ef9 100644
--- a/llvm/test/MC/ELF/RISCV/gen-dwarf.s
+++ b/llvm/test/MC/ELF/RISCV/gen-dwarf.s
@@ -40,28 +40,28 @@
 # CHECK-NEXT: 0x00000020: [DW_RLE_end_of_list ]
 
 # RELOC:      Section ([[#]]) .rela.eh_frame {
-# RELOC-NEXT:   0x1C R_RISCV_32_PCREL .L0  0x0
-# RELOC-NEXT:   0x20 R_RISCV_ADD32 .L0  0x0
-# RELOC-NEXT:   0x20 R_RISCV_SUB32 .L0  0x0
-# RELOC-NEXT:   0x25 R_RISCV_SET6 .L0  0x0
-# RELOC-NEXT:   0x25 R_RISCV_SUB6 .L0  0x0
-# RELOC-NEXT:   0x34 R_RISCV_32_PCREL .L0  0x0
+# RELOC-NEXT:   0x1C R_RISCV_32_PCREL <null> 0x0
+# RELOC-NEXT:   0x20 R_RISCV_ADD32 <null> 0x0
+# RELOC-NEXT:   0x20 R_RISCV_SUB32 <null> 0x0
+# RELOC-NEXT:   0x25 R_RISCV_SET6 <null> 0x0
+# RELOC-NEXT:   0x25 R_RISCV_SUB6 <null> 0x0
+# RELOC-NEXT:   0x34 R_RISCV_32_PCREL <null> 0x0
 # RELOC-NEXT: }
 
 # RELOC:      Section ([[#]]) .rela.debug_rnglists {
 # RELOC-NEXT:   0xD R_RISCV_64 .text.foo 0x0
-# RELOC-NEXT:   0x15 R_RISCV_SET_ULEB128 .L0  0x0
+# RELOC-NEXT:   0x15 R_RISCV_SET_ULEB128 <null> 0x0
 # RELOC-NEXT:   0x15 R_RISCV_SUB_ULEB128 .text.foo 0x0
 # RELOC-NEXT:   0x17 R_RISCV_64 .text.bar 0x0
 # RELOC-NEXT: }
 
 # RELOC:      Section ([[#]]) .rela.debug_line {
-# RELOC:        R_RISCV_ADD16 .L0  0x0
-# RELOC-NEXT:   R_RISCV_SUB16 .L0  0x0
-# RELOC-NEXT:   R_RISCV_ADD16 .L0  0x0
-# RELOC-NEXT:   R_RISCV_SUB16 .L0  0x0
-# RELOC-NEXT:   R_RISCV_ADD16 .L0  0x0
-# RELOC-NEXT:   R_RISCV_SUB16 .L0  0x0
+# RELOC:        R_RISCV_ADD16 <null> 0x0
+# RELOC-NEXT:   R_RISCV_SUB16 <null> 0x0
+# RELOC-NEXT:   R_RISCV_ADD16 <null> 0x0
+# RELOC-NEXT:   R_RISCV_SUB16 <null> 0x0
+# RELOC-NEXT:   R_RISCV_ADD16 <null> 0x0
+# RELOC-NEXT:   R_RISCV_SUB16 <null> 0x0
 # RELOC:      }
 
 # RELOC:      Hex dump of section '.eh_frame':
diff --git a/llvm/test/MC/RISCV/attribute.s b/llvm/test/MC/RISCV/attribute.s
index 56f0cb1daf176f..75b9c65ed1cc2f 100644
--- a/llvm/test/MC/RISCV/attribute.s
+++ b/llvm/test/MC/RISCV/attribute.s
@@ -24,3 +24,6 @@
 
 .attribute priv_spec_revision, 0
 # CHECK: attribute      12, 0
+
+.attribute atomic_abi, 0
+# CHECK: attribute      14, 0
diff --git a/llvm/test/MC/RISCV/cfi-advance.s b/llvm/test/MC/RISCV/cfi-advance.s
index b99af38f553aa0..c4af390be757da 100644
--- a/llvm/test/MC/RISCV/cfi-advance.s
+++ b/llvm/test/MC/RISCV/cfi-advance.s
@@ -1,27 +1,13 @@
 # RUN: llvm-mc -filetype=obj -triple riscv32 %s -o %t.o  
-# RUN: llvm-readelf -sr %t.o | FileCheck %s
+# RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=CHECK %s
 # RUN: llvm-dwarfdump --debug-frame %t.o 2>&1 \
 # RUN:     | FileCheck -check-prefix=CHECK-DWARFDUMP %s
 
-
-# CHECK:      Relocation section '.rela.text1' at offset {{.*}} contains 1 entries:
-# CHECK-NEXT:  Offset     Info    Type                Sym. Value  Symbol's Name + Addend
-# CHECK-NEXT: 00000000  00000313 R_RISCV_CALL_PLT       00000004   .L0 + 0
-# CHECK-EMPTY:
-# CHECK-NEXT: Relocation section '.rela.eh_frame' at offset {{.*}} contains 3 entries:
-# CHECK:       Offset     Info    Type                Sym. Value  Symbol's Name + Addend
-# CHECK-NEXT: 0000001c  00000139 R_RISCV_32_PCREL       00000000   .L0 + 0
-# CHECK-NEXT: 00000035  00000b35 R_RISCV_SET6           00010178   .L0 + 0
-# CHECK-NEXT: 00000035  00000934 R_RISCV_SUB6           0001016e   .L0 + 0
-# CHECK-EMPTY:
-# CHECK:      Symbol table '.symtab' contains 15 entries:
-# CHECK-NEXT:    Num:    Value  Size Type    Bind   Vis       Ndx Name
-# CHECK-NEXT:      0: 00000000     0 NOTYPE  LOCAL  DEFAULT   UND
-# CHECK-NEXT:      1: 00000000     0 NOTYPE  LOCAL  DEFAULT     2 .L0 {{$}}
-# CHECK:           3: 00000004     0 NOTYPE  LOCAL  DEFAULT     2 .L0{{$}}
-# CHECK:           9: 0001016e     0 NOTYPE  LOCAL  DEFAULT     2 .L0 {{$}}
-# CHECK:          11: 00010178     0 NOTYPE  LOCAL  DEFAULT     2 .L0 {{$}}
-
+# CHECK:      .rela.eh_frame {
+# CHECK-NEXT:   0x1C R_RISCV_32_PCREL <null> 0x0
+# CHECK-NEXT:   0x35 R_RISCV_SET6 <null> 0x0
+# CHECK-NEXT:   0x35 R_RISCV_SUB6 <null> 0x0
+# CHECK-NEXT: }
 # CHECK-DWARFDUMP: DW_CFA_advance_loc1: 104
 # CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8
 # CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc2: 259
@@ -37,9 +23,6 @@
 test:
         .cfi_startproc
         nop
-## This looks similar to fake label names ".L0 ". Even if this is ".L0 ",
-## the assembler will not conflate it with fake labels.
-.L0:
         .zero 100, 0x90
         .cfi_def_cfa_offset 8
         nop
@@ -53,6 +36,3 @@ test:
         .cfi_def_cfa_offset 8
         nop
         .cfi_endproc
-
-.section .text1,"ax"
-call .L0
diff --git a/llvm/test/MC/RISCV/fde-reloc.s b/llvm/test/MC/RISCV/fde-reloc.s
index 81ec426c8b6165..1db8929e074703 100644
--- a/llvm/test/MC/RISCV/fde-reloc.s
+++ b/llvm/test/MC/RISCV/fde-reloc.s
@@ -12,7 +12,7 @@ func:
 	.cfi_endproc
 
 # CHECK:   Section (4) .rela.eh_frame {
-# CHECK-NEXT:   0x1C R_RISCV_32_PCREL .L0  0x0
+# CHECK-NEXT:   0x1C R_RISCV_32_PCREL <null> 0x0
 # CHECK-NEXT: }
 # CHECK:      Hex dump of section '.eh_frame':
 # CHECK-NEXT: 0x00000000 10000000 00000000 017a5200 017c0101
diff --git a/llvm/test/MC/RISCV/invalid-attribute.s b/llvm/test/MC/RISCV/invalid-attribute.s
index 1d732af83cda35..2ebf7ddc9aff85 100644
--- a/llvm/test/MC/RISCV/invalid-attribute.s
+++ b/llvm/test/MC/RISCV/invalid-attribute.s
@@ -33,3 +33,6 @@
 
 .attribute arch, 30
 # CHECK: [[@LINE-1]]:18: error: expected string constant
+
+.attribute atomic_abi, "16"
+# CHECK: [[@LINE-1]]:24: error: expected numeric constant
diff --git a/llvm/test/MC/RISCV/scoped-relaxation.s b/llvm/test/MC/RISCV/scoped-relaxation.s
index 56394fd8053282..0b797ee5aca5eb 100644
--- a/llvm/test/MC/RISCV/scoped-relaxation.s
+++ b/llvm/test/MC/RISCV/scoped-relaxation.s
@@ -9,7 +9,7 @@
 .dword function - .
 
 # CHECK: 0x0 R_RISCV_ADD64 function 0x0
-# CHECK-NEXT: 0x0 R_RISCV_SUB64 .L0  0x0
+# CHECK-NEXT: 0x0 R_RISCV_SUB64 <null> 0x0
 
 # Relaxed reference, this will resolve to a pair of `RISCV_ADD64` and
 # `RISCV_SUB64` relocation.
@@ -19,7 +19,7 @@
 .option pop
 
 # CHECK: 0x8 R_RISCV_ADD64 function 0x0
-# CHECK-NEXT: 0x8 R_RISCV_SUB64 .L0  0x0
+# CHECK-NEXT: 0x8 R_RISCV_SUB64 <null> 0x0
 
 # Unrelaxed reference, this will resolve to a pair of `RISCV_ADD64` and
 # `RISCV_SUB64` relocation due to relaxation being sticky to the file.
@@ -29,6 +29,6 @@
 .option pop
 
 # CHECK: 0x10 R_RISCV_ADD64 function 0x0
-# CHECK-NEXT: 0x10 R_RISCV_SUB64 .L0  0x0
+# CHECK-NEXT: 0x10 R_RISCV_SUB64 <null> 0x0
 
 # CHECK: }
diff --git a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td
index 7c9df02ebd87c6..fc8abc6fbc547e 100644
--- a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td
+++ b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td
@@ -1,4 +1,6 @@
 // RUN: llvm-tblgen -warn-on-skipped-patterns -gen-global-isel -I %p/../../include %s -I %p/Common -o /dev/null 2>&1 | FileCheck %s
+// RUN: llvm-tblgen -warn-on-skipped-patterns -gen-global-isel -I %p/../../include %s -I %p/Common -o /dev/null -DIGNORE 2>&1 | FileCheck --allow-empty --check-prefix=IGNORED %s
+
 include "llvm/Target/Target.td"
 include "GlobalISelEmitterCommon.td"
 
@@ -23,6 +25,10 @@ def INSN : I<(outs GPR32:$dst), (ins GPR32:$src1, complex:$src2), []>;
 
 //===- Bail out when we define a variable twice wrt complex suboperands. -===//
 
+#ifdef IGNORE
+let GISelShouldIgnore = 1 in
+#endif
+// IGNORED-NOT: warning: Skipped pattern: Error: {{.*}}
 // CHECK: warning: Skipped pattern: Error: Complex suboperand x referenced by different operands: complex_rr:x:y and complex_rr:x:z.
 def : Pat<(add (complex_rr GPR32:$x, GPR32:$y),
                (complex_rr GPR32:$x, GPR32:$z)),
diff --git a/llvm/test/TableGen/riscv-target-def.td b/llvm/test/TableGen/riscv-target-def.td
index ab589b31192f39..b23c7e4d40198b 100644
--- a/llvm/test/TableGen/riscv-target-def.td
+++ b/llvm/test/TableGen/riscv-target-def.td
@@ -2,8 +2,9 @@
 
 include "llvm/Target/Target.td"
 
-class RISCVExtension<string name, int major, int minor, string fieldname,
-                     string desc, list<SubtargetFeature> implies = [],
+class RISCVExtension<string name, int major, int minor, string desc,
+                     list<SubtargetFeature> implies = [],
+                     string fieldname = !subst("Feature", "Has", NAME),
                      string value = "true">
     : SubtargetFeature<name, fieldname, value, desc, implies> {
   int MajorVersion = major;
@@ -11,18 +12,36 @@ class RISCVExtension<string name, int major, int minor, string fieldname,
   bit Experimental = false;
 }
 
+class RISCVExperimentalExtension<string name, int major, int minor, string desc,
+                                 list<RISCVExtension> implies = [],
+                                 string fieldname = !subst("Feature", "Has", NAME),
+                                 string value = "true">
+    : RISCVExtension<"experimental-"#name, major, minor, desc, implies,
+                     fieldname, value> {
+  let Experimental = true;
+}
+
 def FeatureStdExtI
-    : RISCVExtension<"i", 2, 1, "HasStdExtI",
+    : RISCVExtension<"i", 2, 1,
                      "'I' (Base Integer Instruction Set)">;
 
 def FeatureStdExtZicsr
-    : RISCVExtension<"zicsr", 2, 0, "HasStdExtZicsr",
+    : RISCVExtension<"zicsr", 2, 0,
                      "'zicsr' (CSRs)">;
 
 def FeatureStdExtZifencei
-    : RISCVExtension<"zifencei", 2, 0, "HasStdExtZifencei",
+    : RISCVExtension<"zifencei", 2, 0,
                      "'Zifencei' (fence.i)">;
 
+def FeatureStdExtF
+    : RISCVExtension<"f", 2, 2,
+                     "'F' (Single-Precision Floating-Point)",
+                     [FeatureStdExtZicsr]>;
+
+def FeatureStdExtZidummy
+    : RISCVExperimentalExtension<"zidummy", 0, 1,
+                                 "Dummy">;
+
 def Feature32Bit
     : SubtargetFeature<"32bit", "IsRV32", "true", "Implements RV32">;
 def Feature64Bit
@@ -75,22 +94,49 @@ def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64",
 def ROCKET : RISCVTuneProcessorModel<"rocket",
                                      NoSchedModel>;
 
-// CHECK: #ifndef PROC
-// CHECK: #define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS)
-// CHECK: #endif
+// CHECK:      #ifdef GET_SUPPORTED_EXTENSIONS
+// CHECK-NEXT: #undef GET_SUPPORTED_EXTENSIONS
+
+// CHECK:      static const RISCVSupportedExtension SupportedExtensions[] = {
+// CHECK-NEXT:     {"f", {2, 2}},
+// CHECK-NEXT:     {"i", {2, 1}},
+// CHECK-NEXT:     {"zicsr", {2, 0}},
+// CHECK-NEXT:     {"zifencei", {2, 0}},
+// CHECK-NEXT: };
+
+// CHECK:      static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+// CHECK-NEXT:     {"zidummy", {0, 1}},
+// CHECK-NEXT: };
+
+// CHECK:      #endif // GET_SUPPORTED_EXTENSIONS
+
+// CHECK:      #ifdef GET_IMPLIED_EXTENSIONS
+// CHECK-NEXT: #undef GET_IMPLIED_EXTENSIONS
+
+// CHECK:      static const char *ImpliedExtsF[] = {"zicsr"};
+
+// CHECK:      static constexpr ImpliedExtsEntry ImpliedExts[] = {
+// CHECK-NEXT:     { {"f"}, {ImpliedExtsF} },
+// CHECK-NEXT: };
+
+// CHECK:      #endif // GET_IMPLIED_EXTENSIONS
+
+// CHECK:      #ifndef PROC
+// CHECK-NEXT: #define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS)
+// CHECK-NEXT: #endif
 
-// CHECK: PROC(GENERIC_RV32, {"generic-rv32"}, {"rv32i2p1"}, 0)
-// CHECK: PROC(GENERIC_RV64, {"generic-rv64"}, {"rv64i2p1"}, 0)
-// CHECK: PROC(ROCKET_RV32, {"rocket-rv32"}, {"rv32i2p1_zicsr2p0_zifencei2p0"}, 0)
-// CHECK: PROC(ROCKET_RV64, {"rocket-rv64"}, {"rv64i2p1_zicsr2p0_zifencei2p0"}, 0)
+// CHECK:      PROC(GENERIC_RV32, {"generic-rv32"}, {"rv32i2p1"}, 0)
+// CHECK-NEXT: PROC(GENERIC_RV64, {"generic-rv64"}, {"rv64i2p1"}, 0)
+// CHECK-NEXT: PROC(ROCKET_RV32, {"rocket-rv32"}, {"rv32i2p1_zicsr2p0_zifencei2p0"}, 0)
+// CHECK-NEXT: PROC(ROCKET_RV64, {"rocket-rv64"}, {"rv64i2p1_zicsr2p0_zifencei2p0"}, 0)
 
 // CHECK: #undef PROC
 
-// CHECK: #ifndef TUNE_PROC
-// CHECK: #define TUNE_PROC(ENUM, NAME)
-// CHECK: #endif
+// CHECK:      #ifndef TUNE_PROC
+// CHECK-NEXT: #define TUNE_PROC(ENUM, NAME)
+// CHECK-NEXT: #endif
 
 // CHECK: TUNE_PROC(GENERIC, "generic")
-// CHECK: TUNE_PROC(ROCKET, "rocket")
+// CHECK-NEXT: TUNE_PROC(ROCKET, "rocket")
 
 // CHECK: #undef TUNE_PROC
diff --git a/llvm/test/TableGen/simplify-patfrag.td b/llvm/test/TableGen/simplify-patfrag.td
index 904c29696a6e2c..fbb6f97f286311 100644
--- a/llvm/test/TableGen/simplify-patfrag.td
+++ b/llvm/test/TableGen/simplify-patfrag.td
@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen -gen-dag-isel -I %p/../../include %s 2>&1 | FileCheck %s
+// RUN: llvm-tblgen -gen-dag-isel -I %p/../../include -DIGNORE %s 2>&1 | FileCheck %s
 
 include "llvm/Target/Target.td"
 
@@ -29,6 +30,10 @@ def anyconvert : PatFrags<(ops node:$src),
                           [(bitconvert node:$src),
                            (specialconvert node:$src)]>;
 
+#ifdef IGNORE
+// Ensure ShouldIgnore does not disable records in dag isel emitter
+let GISelShouldIgnore = 1 in
+#endif
 // And a rule that matches that PatFrag and turns it into i2f
 def : Pat<(f32 (anyconvert (i32 GPR:$val))), (i2f GPR:$val)>;
 
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll
new file mode 100644
index 00000000000000..fd5a2044db48f3
--- /dev/null
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll
@@ -0,0 +1,195 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s
+
+; Make sure atomic loads are not bitcasted and lose metadata
+
+define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define float @load_atomic_f32_global_system(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ld = load atomic float, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret float %ld
+}
+
+define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define float @load_atomic_f32_global_agent(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ld = load atomic float, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret float %ld
+}
+
+define float @load_atomic_f32_local(ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define float @load_atomic_f32_local(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr addrspace(3) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ld = load atomic float, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret float %ld
+}
+
+define float @load_atomic_f32_flat_system(ptr %ptr) {
+; CHECK-LABEL: define float @load_atomic_f32_flat_system(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ld = load atomic float, ptr %ptr seq_cst, align 4, !some.unknown.md !0
+  ret float %ld
+}
+
+define float @load_atomic_f32_flat_agent(ptr %ptr) {
+; CHECK-LABEL: define float @load_atomic_f32_flat_agent(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %ld = load atomic float, ptr %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret float %ld
+}
+
+define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define half @load_atomic_f16_global_system(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+; CHECK-NEXT:    ret half [[TMP2]]
+;
+  %ld = load atomic half, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret half %ld
+}
+
+define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define half @load_atomic_f16_global_agent(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+; CHECK-NEXT:    ret half [[TMP2]]
+;
+  %ld = load atomic half, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret half %ld
+}
+
+define half @load_atomic_f16_local(ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define half @load_atomic_f16_local(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half
+; CHECK-NEXT:    ret half [[TMP2]]
+;
+  %ld = load atomic half, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
+  ret half %ld
+}
+
+define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define bfloat @load_atomic_bf16_global_system(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
+; CHECK-NEXT:    ret bfloat [[TMP2]]
+;
+  %ld = load atomic bfloat, ptr addrspace(1) %ptr seq_cst, align 2, !some.unknown.md !0
+  ret bfloat %ld
+}
+
+define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define bfloat @load_atomic_bf16_global_agent(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
+; CHECK-NEXT:    ret bfloat [[TMP2]]
+;
+  %ld = load atomic bfloat, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 2, !some.unknown.md !0
+  ret bfloat %ld
+}
+
+define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define bfloat @load_atomic_bf16_local(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
+; CHECK-NEXT:    ret bfloat [[TMP2]]
+;
+  %ld = load atomic bfloat, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0
+  ret bfloat %ld
+}
+
+define bfloat @load_atomic_bf16_flat(ptr %ptr) {
+; CHECK-LABEL: define bfloat @load_atomic_bf16_flat(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i16, ptr [[PTR]] seq_cst, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat
+; CHECK-NEXT:    ret bfloat [[TMP2]]
+;
+  %ld = load atomic bfloat, ptr %ptr seq_cst, align 2, !some.unknown.md !0
+  ret bfloat %ld
+}
+
+define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define double @load_atomic_f64_global_system(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %ld = load atomic double, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0
+  ret double %ld
+}
+
+define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define double @load_atomic_f64_global_agent(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %ld = load atomic double, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
+  ret double %ld
+}
+
+define double @load_atomic_f64_local(ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define double @load_atomic_f64_local(
+; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i64, ptr addrspace(3) [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %ld = load atomic double, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0
+  ret double %ld
+}
+
+define double @load_atomic_f64_flat_system(ptr %ptr) {
+; CHECK-LABEL: define double @load_atomic_f64_flat_system(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %ld = load atomic double, ptr %ptr seq_cst, align 8, !some.unknown.md !0
+  ret double %ld
+}
+
+define double @load_atomic_f64_flat_agent(ptr %ptr) {
+; CHECK-LABEL: define double @load_atomic_f64_flat_agent(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] syncscope("agent") seq_cst, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %ld = load atomic double, ptr %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
+  ret double %ld
+}
+
+!0 = !{}
+
+
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll
new file mode 100644
index 00000000000000..db0c3a20e62f48
--- /dev/null
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s
+
+define void @store_atomic_f32_global_system(float %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f32_global_system(
+; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[VAL]] to i32
+; CHECK-NEXT:    store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic float %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f32_global_agent(float %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f32_global_agent(
+; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[VAL]] to i32
+; CHECK-NEXT:    store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic float %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f32_local(float %val, ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f32_local(
+; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[VAL]] to i32
+; CHECK-NEXT:    store atomic i32 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic float %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f32_flat(float %val, ptr %ptr) {
+; CHECK-LABEL: define void @store_atomic_f32_flat(
+; CHECK-SAME: float [[VAL:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[VAL]] to i32
+; CHECK-NEXT:    store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic float %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f16_global_system(half %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f16_global_system(
+; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic half %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f16_global_agent(half %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f16_global_agent(
+; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic half %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f16_local(half %val, ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f16_local(
+; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic half %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f16_flat(half %val, ptr %ptr) {
+; CHECK-LABEL: define void @store_atomic_f16_flat(
+; CHECK-SAME: half [[VAL:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic half %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_bf16_global_system(bfloat %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_bf16_global_system(
+; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic bfloat %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_bf16_global_agent(bfloat %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_bf16_global_agent(
+; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic bfloat %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_bf16_local(bfloat %val, ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define void @store_atomic_bf16_local(
+; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic bfloat %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_bf16_flat(bfloat %val, ptr %ptr) {
+; CHECK-LABEL: define void @store_atomic_bf16_flat(
+; CHECK-SAME: bfloat [[VAL:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16
+; CHECK-NEXT:    store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic bfloat %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0
+  ret void
+}
+define void @store_atomic_f64_global_system(double %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f64_global_system(
+; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[VAL]] to i64
+; CHECK-NEXT:    store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    ret void
+;
+  store atomic double %val, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f64_global_agent(double %val, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f64_global_agent(
+; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[VAL]] to i64
+; CHECK-NEXT:    store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8
+; CHECK-NEXT:    ret void
+;
+  store atomic double %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f64_local(double %val, ptr addrspace(3) %ptr) {
+; CHECK-LABEL: define void @store_atomic_f64_local(
+; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[VAL]] to i64
+; CHECK-NEXT:    store atomic i64 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    ret void
+;
+  store atomic double %val, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0
+  ret void
+}
+
+define void @store_atomic_f64_flat(double %val, ptr %ptr) {
+; CHECK-LABEL: define void @store_atomic_f64_flat(
+; CHECK-SAME: double [[VAL:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[VAL]] to i64
+; CHECK-NEXT:    store atomic i64 [[TMP1]], ptr [[PTR]] seq_cst, align 8
+; CHECK-NEXT:    ret void
+;
+  store atomic double %val, ptr %ptr seq_cst, align 8, !some.unknown.md !0
+  ret void
+}
+
+!0 = !{}
diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll
new file mode 100644
index 00000000000000..55cde6c1431fe3
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=riscv64 %s \
+; RUN:   | FileCheck --check-prefixes=CHECK %s
+
+define i16 @sink_trunc1(i64 %a) {
+; CHECK-LABEL: @sink_trunc1(
+; CHECK-NEXT:  fnend:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[A:%.*]] to i16
+; CHECK-NEXT:    ret i16 [[TMP0]]
+;
+  %trunc = trunc i64 %a to i16
+  br label %fnend
+
+fnend:
+  ret i16 %trunc
+}
+
+; The flags on the original trunc should be preserved.
+define i16 @sink_trunc2(i64 %a) {
+; CHECK-LABEL: @sink_trunc2(
+; CHECK-NEXT:  fnend:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc nuw nsw i64 [[A:%.*]] to i16
+; CHECK-NEXT:    ret i16 [[TMP0]]
+;
+  %trunc = trunc nuw nsw i64 %a to i16
+  br label %fnend
+
+fnend:
+  ret i16 %trunc
+}
diff --git a/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll
index b4c24715037bca..d0095231a30f93 100644
--- a/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll
@@ -1,22 +1,22 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ;
 ; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
 ; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=16 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC
 
 define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
-; FUNCSPEC-LABEL: define i64 @bar(
+; FUNCSPEC-LABEL: define range(i64 4, 13) i64 @bar(
 ; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
 ; FUNCSPEC-NEXT:  entry:
-; FUNCSPEC-NEXT:    [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
-; FUNCSPEC-NEXT:    [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
+; FUNCSPEC-NEXT:    [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]])
+; FUNCSPEC-NEXT:    [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]])
 ; FUNCSPEC-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
 ; FUNCSPEC-NEXT:    ret i64 [[ADD]]
 ;
-; NOFUNCSPEC-LABEL: define i64 @bar(
+; NOFUNCSPEC-LABEL: define range(i64 4, 13) i64 @bar(
 ; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
 ; NOFUNCSPEC-NEXT:  entry:
-; NOFUNCSPEC-NEXT:    [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
-; NOFUNCSPEC-NEXT:    [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
+; NOFUNCSPEC-NEXT:    [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]])
+; NOFUNCSPEC-NEXT:    [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]])
 ; NOFUNCSPEC-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
 ; NOFUNCSPEC-NEXT:    ret i64 [[ADD]]
 ;
@@ -28,6 +28,50 @@ entry:
 }
 
 define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
+; NOFUNCSPEC-LABEL: define internal range(i64 2, 7) i64 @foo(
+; NOFUNCSPEC-SAME: i64 [[N:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
+; NOFUNCSPEC-NEXT:  entry:
+; NOFUNCSPEC-NEXT:    br i1 [[C1]], label [[L1:%.*]], label [[L9:%.*]]
+; NOFUNCSPEC:       l1:
+; NOFUNCSPEC-NEXT:    [[PHI1:%.*]] = phi i64 [ [[N]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[L2:%.*]] ]
+; NOFUNCSPEC-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[PHI1]], 1
+; NOFUNCSPEC-NEXT:    br i1 [[C2]], label [[L1_5:%.*]], label [[EXIT:%.*]]
+; NOFUNCSPEC:       l1_5:
+; NOFUNCSPEC-NEXT:    br i1 [[C3]], label [[L1_75:%.*]], label [[L6:%.*]]
+; NOFUNCSPEC:       l1_75:
+; NOFUNCSPEC-NEXT:    br i1 [[C4]], label [[L2]], label [[L3:%.*]]
+; NOFUNCSPEC:       l2:
+; NOFUNCSPEC-NEXT:    [[PHI2]] = phi i64 [ [[PHI1]], [[L1_75]] ], [ [[PHI3:%.*]], [[L3]] ]
+; NOFUNCSPEC-NEXT:    br label [[L1]]
+; NOFUNCSPEC:       l3:
+; NOFUNCSPEC-NEXT:    [[PHI3]] = phi i64 [ [[PHI1]], [[L1_75]] ], [ [[PHI4:%.*]], [[L4:%.*]] ]
+; NOFUNCSPEC-NEXT:    br label [[L2]]
+; NOFUNCSPEC:       l4:
+; NOFUNCSPEC-NEXT:    [[PHI4]] = phi i64 [ [[PHI5:%.*]], [[L5:%.*]] ], [ [[PHI6:%.*]], [[L6]] ]
+; NOFUNCSPEC-NEXT:    br i1 [[C5]], label [[L3]], label [[L6]]
+; NOFUNCSPEC:       l5:
+; NOFUNCSPEC-NEXT:    [[PHI5]] = phi i64 [ [[PHI6]], [[L6_5:%.*]] ], [ [[PHI7:%.*]], [[L7:%.*]] ]
+; NOFUNCSPEC-NEXT:    br label [[L4]]
+; NOFUNCSPEC:       l6:
+; NOFUNCSPEC-NEXT:    [[PHI6]] = phi i64 [ [[PHI4]], [[L4]] ], [ [[PHI1]], [[L1_5]] ]
+; NOFUNCSPEC-NEXT:    br i1 [[C6]], label [[L4]], label [[L6_5]]
+; NOFUNCSPEC:       l6_5:
+; NOFUNCSPEC-NEXT:    br i1 [[C7]], label [[L5]], label [[L8:%.*]]
+; NOFUNCSPEC:       l7:
+; NOFUNCSPEC-NEXT:    [[PHI7]] = phi i64 [ [[PHI9:%.*]], [[L9]] ], [ [[PHI8:%.*]], [[L8]] ]
+; NOFUNCSPEC-NEXT:    br i1 [[C8]], label [[L5]], label [[L8]]
+; NOFUNCSPEC:       l8:
+; NOFUNCSPEC-NEXT:    [[PHI8]] = phi i64 [ [[PHI6]], [[L6_5]] ], [ [[PHI7]], [[L7]] ]
+; NOFUNCSPEC-NEXT:    br i1 [[C9]], label [[L7]], label [[L9]]
+; NOFUNCSPEC:       l9:
+; NOFUNCSPEC-NEXT:    [[PHI9]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[PHI8]], [[L8]] ]
+; NOFUNCSPEC-NEXT:    [[SUB:%.*]] = sub nuw nsw i64 [[PHI9]], 1
+; NOFUNCSPEC-NEXT:    [[MUL:%.*]] = mul nuw nsw i64 [[SUB]], 2
+; NOFUNCSPEC-NEXT:    br i1 [[C10]], label [[L7]], label [[EXIT]]
+; NOFUNCSPEC:       exit:
+; NOFUNCSPEC-NEXT:    [[RES:%.*]] = phi i64 [ 2, [[L1]] ], [ [[MUL]], [[L9]] ]
+; NOFUNCSPEC-NEXT:    ret i64 [[RES]]
+;
 entry:
   br i1 %c1, label %l1, label %l9
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
index b9481baae60b9e..a576d9aa32e140 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll
@@ -49,10 +49,10 @@ entry:
 ; Check if specialisation on the address of a non-const global variable
 ; is not allowed, then it is not performed.
 
-; NO-GLOBALS-LABEL: define internal i32 @g()
+; NO-GLOBALS-LABEL: define internal range(i32 -2147483646, -2147483648) i32 @g()
 ; NO-GLOBALS: call i32 @f(ptr @G)
 
-; NO-GLOBALS-LABEL: define i32 @h0(ptr %p)
+; NO-GLOBALS-LABEL: define range(i32 -2147483646, -2147483648) i32 @h0(ptr %p)
 ; NO-GLOBALS:call i32 @g()
 
 ; NO-GLOBALS-LABEL: define i32 @h1()
@@ -64,10 +64,10 @@ entry:
 ; Check if specialisation on the address of a non-const global variable
 ; is allowed, then it is performed where possible.
 
-; GLOBALS-LABEL: define internal i32 @g()
+; GLOBALS-LABEL: define internal range(i32 -2147483646, -2147483648) i32 @g()
 ; GLOBALS: call i32 @f.specialized.2()
 
-; GLOBALS-LABEL: define i32 @h0(ptr %p)
+; GLOBALS-LABEL: define range(i32 -2147483646, -2147483648) i32 @h0(ptr %p)
 ; GLOBALS: call i32 @g()
 
 ; GLOBALS-LABEL: define i32 @h1()
diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index f107ffe0ec7ebf..3eae3dc261fb2a 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -71,10 +71,10 @@ entry:
 ; CHECK-LIT-LABEL: define i32 @f1
 ; CHECK-LIT: call i32 @neg.specialized.[[#B:]]
 
-; CHECK-LIT-LABEL: define i32 @g0
+; CHECK-LIT-LABEL: define range(i32 -2147483647, -2147483648) i32 @g0
 ; CHECK-LIT: call i32 @add.specialized.[[#C:]]
 
-; CHECK-LIT-LABEL: define i32 @g1
+; CHECK-LIT-LABEL: define range(i32 -2147483647, -2147483648) i32 @g1
 ; CHECK-LIT: call i32 @add.specialized.[[#D:]]
 
 ; CHECK-LIT-LABEL: define float @h0
diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll
index 236821d8ba4c02..f439d4da6080c4 100644
--- a/llvm/test/Transforms/InstCombine/array.ll
+++ b/llvm/test/Transforms/InstCombine/array.ll
@@ -108,3 +108,163 @@ entry:
   store i32 %b, ptr %gep
   ret void
 }
+
+define ptr @gep_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[B_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i64 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %b.nneg = icmp sgt i64 %b, -1
+  call void @llvm.assume(i1 %b.nneg)
+  %add = add nsw i64 %a, %b
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %add
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_add_nsw_not_nonneg1(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_not_nonneg1(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i64 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %add = add nsw i64 %a, %b
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %add
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_add_nsw_not_nonneg2(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_not_nonneg2(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[B_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %b.nneg = icmp sgt i64 %b, -1
+  call void @llvm.assume(i1 %b.nneg)
+  %add = add nsw i64 %a, %b
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %add
+  ret ptr %gep
+}
+
+define ptr @gep_not_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_not_inbounds_add_nsw_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[B_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i64 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %b.nneg = icmp sgt i64 %b, -1
+  call void @llvm.assume(i1 %b.nneg)
+  %add = add nsw i64 %a, %b
+  %gep = getelementptr i32, ptr %ptr, i64 %add
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_add_not_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_not_nsw_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[B_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i64 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %b.nneg = icmp sgt i64 %b, -1
+  call void @llvm.assume(i1 %b.nneg)
+  %add = add i64 %a, %b
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %add
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) {
+; CHECK-LABEL: define ptr @gep_inbounds_sext_add_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i32 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %add = add nsw i32 %a, 10
+  %idx = sext i32 %add to i64
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_sext_add_not_nonneg_1(ptr %ptr, i32 %a) {
+; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_1(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -40
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i32 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %add = add nsw i32 %a, -10
+  %idx = sext i32 %add to i64
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_inbounds_sext_add_not_nonneg_2(ptr %ptr, i32 %a) {
+; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_2(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %add = add nsw i32 %a, 10
+  %idx = sext i32 %add to i64
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx
+  ret ptr %gep
+}
+
+define ptr @gep_not_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) {
+; CHECK-LABEL: define ptr @gep_not_inbounds_sext_add_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %a.nneg = icmp sgt i32 %a, -1
+  call void @llvm.assume(i1 %a.nneg)
+  %add = add nsw i32 %a, 10
+  %idx = sext i32 %add to i64
+  %gep = getelementptr i32, ptr %ptr, i64 %idx
+  ret ptr %gep
+}
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
index 7c9289c447113f..3c4088832feaaa 100644
--- a/llvm/test/Transforms/InstCombine/fneg.ll
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -980,7 +980,7 @@ define float @fneg_ldexp_contract(float %x, i32 %n) {
 define float @fneg_ldexp_metadata(float %x, i32 %n) {
 ; CHECK-LABEL: @fneg_ldexp_metadata(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg float [[X:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[N:%.*]]), !arst !0
+; CHECK-NEXT:    [[NEG:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[N:%.*]]), !arst [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret float [[NEG]]
 ;
   %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %n), !arst !0
@@ -988,4 +988,125 @@ define float @fneg_ldexp_metadata(float %x, i32 %n) {
   ret float %neg
 }
 
+define float @test_fneg_select_constants(i1 %cond) {
+; CHECK-LABEL: @test_fneg_select_constants(
+; CHECK-NEXT:    [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float 0.000000e+00
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float 0.0, float -0.0
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+define <2 x float> @test_fneg_vec(<2 x i1> %cond) {
+; CHECK-LABEL: @test_fneg_vec(
+; CHECK-NEXT:    [[NEG:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> <float -0.000000e+00, float 0.000000e+00>, <2 x float> <float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[NEG]]
+;
+  %sel1 = select <2 x i1> %cond, <2 x float> <float 0.0, float -0.0>, <2 x float> <float -0.0, float 0.0>
+  %neg = fneg <2 x float> %sel1
+  ret <2 x float> %neg
+}
+
+define float @test_fneg_select_var_constant(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_var_constant(
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = select i1 [[COND:%.*]], float [[X_NEG]], float 0.000000e+00
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float %x, float -0.0
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+; nsz can be preserved.
+
+define float @test_fneg_select_var_constant_fmf1(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_var_constant_fmf1(
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = select nnan ninf nsz i1 [[COND:%.*]], float [[X_NEG]], float -1.000000e+00
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select nnan ninf nsz i1 %cond, float %x, float 1.0
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+define float @test_fneg_select_var_constant_fmf2(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_var_constant_fmf2(
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg nnan ninf nsz float [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = select nnan ninf nsz i1 [[COND:%.*]], float [[X_NEG]], float -1.000000e+00
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float %x, float 1.0
+  %neg = fneg nnan ninf nsz float %sel1
+  ret float %neg
+}
+
+define float @test_fneg_select_constant_var(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_constant_var(
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[X_NEG]]
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float 0.0, float %x
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+; Make sure nabs is generated.
+
+define float @test_fneg_select_abs(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_abs(
+; CHECK-NEXT:    [[ABSX:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[ABSX_NEG:%.*]] = fneg float [[ABSX]]
+; CHECK-NEXT:    [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[ABSX_NEG]]
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %absx = call float @llvm.fabs.f32(float %x)
+  %sel1 = select i1 %cond, float 0.0, float %absx
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+define float @test_fneg_fabs_select(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_fabs_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[DOTNEG]]
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float 0.0, float %x
+  %abs = call float @llvm.fabs.f32(float %sel1)
+  %neg = fneg float %abs
+  ret float %neg
+}
+
+define float @test_fneg_select_constant_var_multiuse(i1 %cond, float %x) {
+; CHECK-LABEL: @test_fneg_select_constant_var_multiuse(
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[X:%.*]]
+; CHECK-NEXT:    call void @use(float [[SEL1]])
+; CHECK-NEXT:    [[NEG:%.*]] = fneg float [[SEL1]]
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %sel1 = select i1 %cond, float 0.0, float %x
+  call void @use(float %sel1)
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
+; Don't break fmax idioms.
+
+define float @test_fneg_select_maxnum(float %x) {
+; CHECK-LABEL: @test_fneg_select_maxnum(
+; CHECK-NEXT:    [[SEL1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    [[NEG:%.*]] = fneg float [[SEL1]]
+; CHECK-NEXT:    ret float [[NEG]]
+;
+  %cmp1 = fcmp ogt float %x, 1.0
+  %sel1 = select nnan nsz i1 %cmp1, float %x, float 1.0
+  %neg = fneg float %sel1
+  ret float %neg
+}
+
 !0 = !{}
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
new file mode 100644
index 00000000000000..7b7c6fba699c21
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+declare void @use64(i64)
+declare void @useptr(ptr)
+
+define ptr @test_zero(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_zero(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_nonzero(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_nonzero(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 4
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 2
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_or_disjoint(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_or_disjoint(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = or disjoint i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_zero_multiuse_index(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_zero_multiuse_index(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INDEX:%.*]] = add i64 [[A]], 1
+; CHECK-NEXT:    call void @use64(i64 [[INDEX]])
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 1
+  call void @use64(i64 %index)
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_zero_multiuse_ptr(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_zero_multiuse_ptr(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    call void @useptr(ptr [[P1]])
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  call void @useptr(ptr %p1)
+  %index = add i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
+; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
+; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add nsw i32 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i32 %index
+  ret ptr %p2
+}
+
+define ptr @test_zero_trunc_add(ptr %base, i128 %a) {
+; CHECK-LABEL: define ptr @test_zero_trunc_add(
+; CHECK-SAME: ptr [[BASE:%.*]], i128 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i128 [[A]] to i64
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i128 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i128 %index
+  ret ptr %p2
+}
+
+define ptr @test_non_i8(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_non_i8(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %p1 = getelementptr i16, ptr %base, i64 -4
+  %index = add i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_non_const(ptr %base, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @test_non_const(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 %b
+  %index = add i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_too_many_indices(ptr %base, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @test_too_many_indices(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]]
+; CHECK-NEXT:    [[INDEX:%.*]] = add i64 [[A]], 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 1, i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 %b
+  %index = add i64 %a, 1
+  %p2 = getelementptr [8 x i32], ptr %p1, i64 1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_wrong_op(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_wrong_op(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[INDEX:%.*]] = xor i64 [[A]], 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = xor i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_sext_add_without_nsw(ptr %base, i32 %a) {
+; CHECK-LABEL: define ptr @test_sext_add_without_nsw(
+; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[INDEX:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i32 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i32 %index
+  ret ptr %p2
+}
+
+define ptr @test_or_without_disjoint(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_or_without_disjoint(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[INDEX:%.*]] = or i64 [[A]], 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = or i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_smul_overflow(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_smul_overflow(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -12
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 9223372036854775806
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_sadd_overflow(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_sadd_overflow(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -9223372036854775808
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 9223372036854775804
+  %index = add i64 %a, 1
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_nonzero_multiuse_index(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_nonzero_multiuse_index(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[INDEX:%.*]] = add i64 [[A]], 2
+; CHECK-NEXT:    call void @use64(i64 [[INDEX]])
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 2
+  call void @use64(i64 %index)
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_nonzero_multiuse_ptr(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_nonzero_multiuse_ptr(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    call void @useptr(ptr [[P1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  call void @useptr(ptr %p1)
+  %index = add i64 %a, 2
+  %p2 = getelementptr i32, ptr %p1, i64 %index
+  ret ptr %p2
+}
+
+define ptr @test_scalable(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_scalable(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P1]], i64 [[A]]
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[TMP0]], i64 1
+; CHECK-NEXT:    ret ptr [[P2]]
+;
+entry:
+  %p1 = getelementptr i8, ptr %base, i64 -4
+  %index = add i64 %a, 1
+  %p2 = getelementptr <vscale x 4 x i32>, ptr %p1, i64 %index
+  ret ptr %p2
+}
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index a7d01b4f824db0..e4fb7764ba9e53 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -364,8 +364,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_1(i1 %cond,
 define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    ret float [[FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %select = select i1 %cond, float %x, float 0xFFF0000000000000
   %fabs = call float @llvm.fabs.f32(float %select)
@@ -376,8 +376,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, f
 define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    ret float [[FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fabs = call float @llvm.fabs.f32(float %select)
@@ -400,8 +400,8 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__fabs_
 define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    ret float [[FABS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fabs = call float @llvm.fabs.f32(float %select)
@@ -435,8 +435,8 @@ define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_na
 define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT:    ret float [[FNEG]]
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    ret float [[X_NEG]]
 ;
   %select = select i1 %cond, float %x, float 0xFFF0000000000000
   %fneg = fneg float %select
@@ -447,8 +447,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, f
 define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT:    ret float [[FNEG]]
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    ret float [[X_NEG]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fneg = fneg float %select
@@ -459,8 +459,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___
 define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT:    ret float [[FNEG]]
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    ret float [[X_NEG]]
 ;
   %select = select i1 %cond, float 0xFFF0000000000000, float %x
   %fneg = fneg float %select
@@ -470,8 +470,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___
 define nofpclass(pzero psub pnorm pinf) float @ret_nofpclass_nopositives___fneg_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives___fneg_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[SELECT]]
+; CHECK-NEXT:    [[X_NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT:    [[FNEG:%.*]] = select i1 [[COND]], float [[X_NEG]], float 0xFFF0000000000000
 ; CHECK-NEXT:    ret float [[FNEG]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
@@ -483,9 +483,9 @@ define nofpclass(pzero psub pnorm pinf) float @ret_nofpclass_nopositives___fneg_
 define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[FABS]]
-; CHECK-NEXT:    ret float [[FNEG]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    ret float [[DOTNEG]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fabs = call float @llvm.fabs.f32(float %select)
@@ -497,9 +497,9 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %co
 define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[FNEG:%.*]] = fneg float [[FABS]]
-; CHECK-NEXT:    ret float [[FNEG]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    ret float [[DOTNEG]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fabs = call float @llvm.fabs.f32(float %select)
@@ -535,8 +535,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_unknown_select_pinf_rhs
 define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs(i1 %cond, float %x) {
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    ret float [[COPYSIGN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %copysign = call float @llvm.copysign.f32(float %select, float 1.0)
@@ -547,8 +547,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rh
 ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = fneg float [[TMP1]]
-; CHECK-NEXT:    ret float [[COPYSIGN]]
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    ret float [[DOTNEG]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %copysign = call float @llvm.copysign.f32(float %select, float -1.0)
@@ -627,8 +627,8 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_non
 define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs(i1 %cond, float %x, float %sign) {
 ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[SIGN:%.*]]) {
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    ret float [[COPYSIGN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
   %fabs.sign = call float @llvm.fabs.f32(float %sign)
@@ -678,9 +678,9 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__copys
 define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) {
 ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__copysign_unknown_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) {
-; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], float [[TMP2]], float 0x7FF0000000000000
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = select i1 [[COND]], float [[DOTNEG]], float 0xFFF0000000000000
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
@@ -705,9 +705,9 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives_no
 define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_nonan__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) {
 ; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_nofpclass_no_positives_nonan__copysign_unknown_select_pinf_rhs
 ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) {
-; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], float [[TMP2]], float 0x7FF0000000000000
-; CHECK-NEXT:    [[COPYSIGN:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
+; CHECK-NEXT:    [[DOTNEG:%.*]] = fneg float [[TMP1]]
+; CHECK-NEXT:    [[COPYSIGN:%.*]] = select i1 [[COND]], float [[DOTNEG]], float 0xFFF0000000000000
 ; CHECK-NEXT:    ret float [[COPYSIGN]]
 ;
   %select = select i1 %cond, float %x, float 0x7FF0000000000000
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 56dd9c6a879981..32ed4a787e9262 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1123,7 +1123,8 @@ define i64 @test58(ptr %foo, i64 %i, i64 %j) {
 
 define i64 @test59(ptr %foo, i64 %i) {
 ; CHECK-LABEL: @test59(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO:%.*]], i64 0, i64 42, i64 [[I:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[FOO:%.*]], i64 [[I:%.*]]
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4200
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4200
 ; CHECK-NEXT:    store ptr [[GEP1]], ptr @dummy_global1, align 8
 ; CHECK-NEXT:    store ptr [[GEP2]], ptr @dummy_global2, align 8
@@ -1142,13 +1143,12 @@ define i64 @test59(ptr %foo, i64 %i) {
 
 define i64 @test60(ptr %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test60(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO:%.*]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]]
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4200
-; CHECK-NEXT:    [[CAST1:%.*]] = ptrtoint ptr [[GEP1]] to i64
-; CHECK-NEXT:    [[CAST2:%.*]] = ptrtoint ptr [[GEP2]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]]
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[J:%.*]], 100
+; CHECK-NEXT:    [[GEP1_OFFS:%.*]] = add nsw i64 [[GEP1_IDX]], [[I:%.*]]
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 [[GEP1_OFFS]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = add nsw i64 [[GEP1_OFFS]], -4200
 ; CHECK-NEXT:    store ptr [[GEP1]], ptr @dummy_global1, align 8
-; CHECK-NEXT:    ret i64 [[SUB]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
 ; gep1 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic.
   %gep1 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 %j, i64 %i
@@ -1162,13 +1162,12 @@ define i64 @test60(ptr %foo, i64 %i, i64 %j) {
 
 define i64 @test61(ptr %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test61(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 4200
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]]
-; CHECK-NEXT:    [[CAST1:%.*]] = ptrtoint ptr [[GEP1]] to i64
-; CHECK-NEXT:    [[CAST2:%.*]] = ptrtoint ptr [[GEP2]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]]
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[J:%.*]], 100
+; CHECK-NEXT:    [[GEP2_OFFS:%.*]] = add nsw i64 [[GEP2_IDX]], [[I:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 [[GEP2_OFFS]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub nsw i64 4200, [[GEP2_OFFS]]
 ; CHECK-NEXT:    store ptr [[GEP2]], ptr @dummy_global2, align 8
-; CHECK-NEXT:    ret i64 [[SUB]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
 ; gep2 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic.
   %gep1 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 42, i64 0
@@ -1186,11 +1185,8 @@ define i64 @test_sub_ptradd_multiuse(ptr %p, i64 %idx1, i64 %idx2) {
 ; CHECK-LABEL: @test_sub_ptradd_multiuse(
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[IDX1:%.*]]
 ; CHECK-NEXT:    call void @use.ptr(ptr [[P1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX2:%.*]]
-; CHECK-NEXT:    [[P1_INT:%.*]] = ptrtoint ptr [[P1]] to i64
-; CHECK-NEXT:    [[P2_INT:%.*]] = ptrtoint ptr [[P2]] to i64
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[P1_INT]], [[P2_INT]]
-; CHECK-NEXT:    ret i64 [[SUB]]
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub nsw i64 [[IDX1]], [[IDX2:%.*]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
 ;
   %p1 = getelementptr inbounds i8, ptr %p, i64 %idx1
   call void @use.ptr(ptr %p1)
diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll b/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll
index 4c88f4acc12f16..2795333effd76b 100644
--- a/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll
+++ b/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll
@@ -109,8 +109,8 @@ attributes #6 = { noreturn nounwind }
 !11 = !{}
 !12 = !{!"branch_weights", i32 1048575, i32 1}
 ; CHECK: Function Attrs: minsize mustprogress nofree norecurse nosync nounwind optsize willreturn memory(none)
-; CHECK-LABEL: define dso_local noundef i32 @_Z9nothrow_ei
-; CHECK-SAME: (i32 noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] !type !4 !type !5 !type !6 {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 2) i32 @_Z9nothrow_ei
+; CHECK-SAME: (i32 noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] !type [[META4:![0-9]+]] !type [[META5:![0-9]+]] !type [[META6:![0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i32 [[NUM]], 0
 ; CHECK-NEXT:    [[DOT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
@@ -118,8 +118,8 @@ attributes #6 = { noreturn nounwind }
 ;
 ;
 ; CHECK: Function Attrs: minsize mustprogress nofree norecurse nosync nounwind optsize willreturn memory(write, argmem: none, inaccessiblemem: none)
-; CHECK-LABEL: define dso_local noundef i32 @_Z10call_catchi
-; CHECK-SAME: (i32 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !type !4 !type !5 !type !6 {
+; CHECK-LABEL: define dso_local noundef range(i32 0, 2) i32 @_Z10call_catchi
+; CHECK-SAME: (i32 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !type [[META4]] !type [[META5]] !type [[META6]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store ptr @_Z9nothrow_ei.cfi_jt, ptr @catch_ptr, align 8, !tbaa [[TBAA7:![0-9]+]]
 ; CHECK-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp ne i32 [[NUM]], 0
@@ -131,17 +131,17 @@ attributes #6 = { noreturn nounwind }
 ; CHECK-LABEL: define weak_odr hidden void @__cfi_check_fail
 ; CHECK-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq ptr [[TMP0]], null, !nosanitize !11
-; CHECK-NEXT:    br i1 [[DOTNOT]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize !11
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq ptr [[TMP0]], null, !nosanitize [[META11:![0-9]+]]
+; CHECK-NEXT:    br i1 [[DOTNOT]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize [[META11]]
 ; CHECK:       trap:
-; CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR5:[0-9]+]], !nosanitize !11
-; CHECK-NEXT:    unreachable, !nosanitize !11
+; CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR6:[0-9]+]], !nosanitize [[META11]]
+; CHECK-NEXT:    unreachable, !nosanitize [[META11]]
 ; CHECK:       cont:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 4, !nosanitize !11
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 4, !nosanitize [[META11]]
 ; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[TMP2]], 5
 ; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRAP]], label [[CONT6:%.*]]
 ; CHECK:       cont6:
-; CHECK-NEXT:    ret void, !nosanitize !11
+; CHECK-NEXT:    ret void, !nosanitize [[META11]]
 ;
 ;
 ; CHECK-LABEL: define weak void @__cfi_check
@@ -153,8 +153,8 @@ attributes #6 = { noreturn nounwind }
 ;
 ; CHECK: Function Attrs: naked nocf_check noinline nounwind
 ; CHECK-LABEL: define internal void @_Z9nothrow_ei.cfi_jt
-; CHECK-SAME: () #[[ATTR4:[0-9]+]] align 8 {
+; CHECK-SAME: () #[[ATTR5:[0-9]+]] align 8 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr nonnull @_Z9nothrow_ei) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    tail call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr nonnull @_Z9nothrow_ei) #[[ATTR7:[0-9]+]]
 ; CHECK-NEXT:    unreachable
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
index 9206893cb2341e..c133852f66937d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
@@ -7,7 +7,7 @@ target triple = "aarch64"
 ; Check that the function gets vectorized.
 
 define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
-; CHECK-LABEL: define i32 @quant_4x4
+; CHECK-LABEL: define range(i32 0, 2) i32 @quant_4x4
 ; CHECK-SAME: (ptr nocapture noundef [[DCT:%.*]], ptr nocapture noundef readonly [[MF:%.*]], ptr nocapture noundef readonly [[BIAS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DCT]], i64 32
diff --git a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
index 67d721b23d6f00..35d5ceeb91950f 100644
--- a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
+++ b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -O1 -S < %s  | FileCheck %s
 
 define i32 @testa(i32 %mul) {
-; CHECK-LABEL: define i32 @testa(
+; CHECK-LABEL: define range(i32 -65536, 65536) i32 @testa(
 ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[MUL]], 15
 ; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
@@ -16,7 +16,7 @@ define i32 @testa(i32 %mul) {
 }
 
 define i32 @testb(i32 %mul) {
-; CHECK-LABEL: define i32 @testb(
+; CHECK-LABEL: define range(i32 -16777216, 16777216) i32 @testb(
 ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; CHECK-NEXT:    [[SHR102:%.*]] = ashr i32 [[MUL]], 7
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128)
diff --git a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
index fb338a6507eba8..63cfef6f3d09ae 100644
--- a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll
@@ -19,7 +19,7 @@
 ;; }
 
 define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) {
-; CHECK-LABEL: define i16 @vecreduce_smin_v2i16(
+; CHECK-LABEL: define range(i16 -32768, 1) i16 @vecreduce_smin_v2i16(
 ; CHECK:    @llvm.smin.v2i16
 
 entry:
@@ -65,7 +65,7 @@ for.end:                                          ; preds = %for.cond
 }
 
 define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) {
-; CHECK-LABEL: define i16 @vecreduce_smax_v2i16(
+; CHECK-LABEL: define range(i16 0, -32768) i16 @vecreduce_smax_v2i16(
 ; CHECK:  @llvm.smax.v2i16
 
 entry:
diff --git a/llvm/test/Transforms/SCCP/and-add-shl.ll b/llvm/test/Transforms/SCCP/and-add-shl.ll
index 7c037ffa6bf640..7af563f13a18ab 100644
--- a/llvm/test/Transforms/SCCP/and-add-shl.ll
+++ b/llvm/test/Transforms/SCCP/and-add-shl.ll
@@ -59,7 +59,7 @@ define i8 @and_not_shl_1(i8 %x) {
 
 ; Negative test: https://alive2.llvm.org/ce/z/Zv4Pyu
 define i8 @and_add_shl_overlap(i8 %x) {
-; CHECK-LABEL: define i8 @and_add_shl_overlap
+; CHECK-LABEL: define range(i8 0, 33) i8 @and_add_shl_overlap
 ; CHECK-SAME: (i8 [[X:%.*]]) {
 ; CHECK-NEXT:    [[OP1_P2:%.*]] = icmp ule i8 [[X]], 6
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[OP1_P2]])
@@ -77,7 +77,7 @@ define i8 @and_add_shl_overlap(i8 %x) {
 }
 
 define i8 @and_not_shl_overlap(i8 %x) {
-; CHECK-LABEL: define i8 @and_not_shl_overlap
+; CHECK-LABEL: define range(i8 0, 5) i8 @and_not_shl_overlap
 ; CHECK-SAME: (i8 [[X:%.*]]) {
 ; CHECK-NEXT:    [[OP1_P2:%.*]] = icmp ule i8 [[X]], 3
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[OP1_P2]])
diff --git a/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll
index 64c1b9020a054f..c24c554102ddf8 100644
--- a/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll
+++ b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll
@@ -1,20 +1,21 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=ipsccp -S %s | FileCheck %s
 
 ; Test 1.
 ; Both arguments and return value of @callee can be tracked. The inferred range
 ; can be added to call sites.
 define internal i32 @callee(i32 %x) {
-; CHECK-LABEL: @callee(
-; CHECK-NEXT:    ret i32 [[X:%.*]]
+; CHECK-LABEL: define internal range(i32 0, 21) i32 @callee(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    ret i32 [[X]]
 ;
   ret i32 %x
 }
 
 define i32 @caller1() {
-; CHECK-LABEL: @caller1(
-; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee(i32 10), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee(i32 20), !range [[RNG0]]
+; CHECK-LABEL: define range(i32 0, 41) i32 @caller1() {
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee(i32 10)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee(i32 20)
 ; CHECK-NEXT:    [[A:%.*]] = add nuw nsw i32 [[C1]], [[C2]]
 ; CHECK-NEXT:    ret i32 [[A]]
 ;
@@ -25,9 +26,10 @@ define i32 @caller1() {
 }
 
 define i32 @caller2(i32 %x) {
-; CHECK-LABEL: @caller2(
-; CHECK-NEXT:    [[X_15:%.*]] = and i32 [[X:%.*]], 15
-; CHECK-NEXT:    [[C:%.*]] = call i32 @callee(i32 [[X_15]]), !range [[RNG0]]
+; CHECK-LABEL: define range(i32 0, 21) i32 @caller2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[X_15:%.*]] = and i32 [[X]], 15
+; CHECK-NEXT:    [[C:%.*]] = call i32 @callee(i32 [[X_15]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %x.15 = and i32 %x, 15
@@ -43,14 +45,15 @@ define i32 @caller2(i32 %x) {
 declare void @use_cb1(ptr)
 
 define internal i32 @callee2(i32 %x) {
-; CHECK-LABEL: @callee2(
-; CHECK-NEXT:    ret i32 [[X:%.*]]
+; CHECK-LABEL: define internal i32 @callee2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    ret i32 [[X]]
 ;
   ret i32 %x
 }
 
 define void @caller_cb1() {
-; CHECK-LABEL: @caller_cb1(
+; CHECK-LABEL: define void @caller_cb1() {
 ; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee2(i32 9)
 ; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee2(i32 10)
 ; CHECK-NEXT:    call void @use_cb1(ptr @callee2)
@@ -70,8 +73,9 @@ define void @caller_cb1() {
 declare void @use_cb2(ptr)
 
 define internal i32 @callee3(i32 %x) {
-; CHECK-LABEL: @callee3(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 10
+; CHECK-LABEL: define internal range(i32 500, 601) i32 @callee3(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 10
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 500, i32 600
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
@@ -81,9 +85,9 @@ define internal i32 @callee3(i32 %x) {
 }
 
 define void @caller_cb2() {
-; CHECK-LABEL: @caller_cb2(
-; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee3(i32 9), !range [[RNG1:![0-9]+]]
-; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee3(i32 10), !range [[RNG1]]
+; CHECK-LABEL: define void @caller_cb2() {
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee3(i32 9)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee3(i32 10)
 ; CHECK-NEXT:    call void @use_cb2(ptr @callee3)
 ; CHECK-NEXT:    ret void
 ;
@@ -100,9 +104,10 @@ define void @caller_cb2() {
 declare void @use_cb3(ptr)
 
 define internal i32 @callee4(i32 %x, i32 %y) {
-; CHECK-LABEL: @callee4(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 10
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 500, i32 [[Y:%.*]]
+; CHECK-LABEL: define internal i32 @callee4(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X]], 10
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 500, i32 [[Y]]
 ; CHECK-NEXT:    ret i32 [[S]]
 ;
   %c = icmp eq i32 %x, 10
@@ -111,11 +116,9 @@ define internal i32 @callee4(i32 %x, i32 %y) {
 }
 
 define void @caller_cb3() {
-; CHECK-LABEL: @caller_cb3(
+; CHECK-LABEL: define void @caller_cb3() {
 ; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee4(i32 11, i32 30)
-; CHECK-NOT:   !range
 ; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee4(i32 12, i32 40)
-; CHECK-NOT:   !range
 ; CHECK-NEXT:    call void @use_cb3(ptr @callee4)
 ; CHECK-NEXT:    ret void
 ;
@@ -129,15 +132,16 @@ define void @caller_cb3() {
 ; Range for the return value of callee5 includes undef. No range metadata
 ; should be added at call sites.
 define internal i32 @callee5(i32 %x, i32 %y) {
-; CHECK-LABEL: @callee5(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], 15
+; CHECK-LABEL: define internal i32 @callee5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X]], 15
 ; CHECK-NEXT:    br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[Y:%.*]], [[BB1]] ], [ undef, [[BB2]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[Y]], [[BB1]] ], [ undef, [[BB2]] ]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %c = icmp slt i32 %x, 15
@@ -155,11 +159,9 @@ exit:
 }
 
 define i32 @caller5() {
-; CHECK-LABEL: @caller5(
+; CHECK-LABEL: define range(i32 200, 401) i32 @caller5() {
 ; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee5(i32 10, i32 100)
-; CHECK-NOT:   !range
 ; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee5(i32 20, i32 200)
-; CHECK-NOT:   !range
 ; CHECK-NEXT:    [[A:%.*]] = add i32 [[C1]], [[C2]]
 ; CHECK-NEXT:    ret i32 [[A]]
 ;
@@ -170,8 +172,9 @@ define i32 @caller5() {
 }
 
 define internal <2 x i64> @ctlz(<2 x i64> %arg) {
-; CHECK-LABEL: @ctlz(
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[ARG:%.*]], i1 false)
+; CHECK-LABEL: define internal range(i64 0, 65) <2 x i64> @ctlz(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[ARG]], i1 false)
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
   %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %arg, i1 false)
@@ -179,8 +182,9 @@ define internal <2 x i64> @ctlz(<2 x i64> %arg) {
 }
 
 define <2 x i64> @ctlz_caller(<2 x i64> %arg) {
-; CHECK-LABEL: @ctlz_caller(
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @ctlz(<2 x i64> [[ARG:%.*]]), !range [[RNG2:![0-9]+]]
+; CHECK-LABEL: define range(i64 0, 65) <2 x i64> @ctlz_caller(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @ctlz(<2 x i64> [[ARG]])
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
   %res = call <2 x i64> @ctlz(<2 x i64> %arg)
@@ -189,6 +193,3 @@ define <2 x i64> @ctlz_caller(<2 x i64> %arg) {
 
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 
-; CHECK: [[RNG0]] = !{i32 0, i32 21}
-; CHECK: [[RNG1]] = !{i32 500, i32 601}
-; CHECK: [[RNG2]] = !{i64 0, i64 65}
diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
index 80d90922c2fbdb..05fa04a9fbe06f 100644
--- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
+++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
@@ -1,10 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=ipsccp -S | FileCheck %s
 
 ; x = [100, 301)
 define internal i1 @f.trunc(i32 %x) {
-; CHECK-LABEL: @f.trunc(
-; CHECK-NEXT:    [[T_1:%.*]] = trunc nuw nsw i32 [[X:%.*]] to i16
+; CHECK-LABEL: define internal i1 @f.trunc(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[T_1:%.*]] = trunc nuw nsw i32 [[X]] to i16
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp sgt i16 [[T_1]], 299
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp slt i16 [[T_1]], 101
 ; CHECK-NEXT:    [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]]
@@ -43,7 +44,7 @@ define internal i1 @f.trunc(i32 %x) {
 }
 
 define i1 @caller1() {
-; CHECK-LABEL: @caller1(
+; CHECK-LABEL: define i1 @caller1() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.trunc(i32 100)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.trunc(i32 300)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -58,14 +59,15 @@ define i1 @caller1() {
 
 ; x = [100, 301)
 define internal i1 @f.zext(i32 %x, i32 %y) {
-; CHECK-LABEL: @f.zext(
-; CHECK-NEXT:    [[T_1:%.*]] = zext nneg i32 [[X:%.*]] to i64
+; CHECK-LABEL: define internal i1 @f.zext(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[T_1:%.*]] = zext nneg i32 [[X]] to i64
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp slt i64 [[T_1]], 101
 ; CHECK-NEXT:    [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]]
 ; CHECK-NEXT:    [[RES_2:%.*]] = add nuw nsw i1 [[RES_1]], false
 ; CHECK-NEXT:    [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]]
-; CHECK-NEXT:    [[T_2:%.*]] = zext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[T_2:%.*]] = zext i32 [[Y]] to i64
 ; CHECK-NEXT:    [[C_5:%.*]] = icmp sgt i64 [[T_2]], 300
 ; CHECK-NEXT:    [[C_6:%.*]] = icmp sgt i64 [[T_2]], 299
 ; CHECK-NEXT:    [[C_8:%.*]] = icmp slt i64 [[T_2]], 1
@@ -97,7 +99,7 @@ define internal i1 @f.zext(i32 %x, i32 %y) {
 }
 
 define i1 @caller.zext() {
-; CHECK-LABEL: @caller.zext(
+; CHECK-LABEL: define i1 @caller.zext() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.zext(i32 100, i32 -120)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.zext(i32 300, i32 900)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -111,14 +113,15 @@ define i1 @caller.zext() {
 
 ; x = [100, 301)
 define internal i1 @f.sext(i32 %x, i32 %y) {
-; CHECK-LABEL: @f.sext(
-; CHECK-NEXT:    [[T_1:%.*]] = zext nneg i32 [[X:%.*]] to i64
+; CHECK-LABEL: define internal i1 @f.sext(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[T_1:%.*]] = zext nneg i32 [[X]] to i64
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp slt i64 [[T_1]], 101
 ; CHECK-NEXT:    [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]]
 ; CHECK-NEXT:    [[RES_2:%.*]] = add nuw nsw i1 [[RES_1]], false
 ; CHECK-NEXT:    [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]]
-; CHECK-NEXT:    [[T_2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[T_2:%.*]] = sext i32 [[Y]] to i64
 ; CHECK-NEXT:    [[C_6:%.*]] = icmp sgt i64 [[T_2]], 899
 ; CHECK-NEXT:    [[C_8:%.*]] = icmp slt i64 [[T_2]], -119
 ; CHECK-NEXT:    [[RES_4:%.*]] = add nuw nsw i1 [[RES_3]], false
@@ -148,7 +151,7 @@ define internal i1 @f.sext(i32 %x, i32 %y) {
 }
 
 define i1 @caller.sext() {
-; CHECK-LABEL: @caller.sext(
+; CHECK-LABEL: define i1 @caller.sext() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.sext(i32 100, i32 -120)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.sext(i32 300, i32 900)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -162,8 +165,9 @@ define i1 @caller.sext() {
 
 ; There's nothing we can do besides going to the full range or overdefined.
 define internal i1 @f.fptosi(i32 %x) {
-; CHECK-LABEL: @f.fptosi(
-; CHECK-NEXT:    [[TO_DOUBLE:%.*]] = sitofp i32 [[X:%.*]] to double
+; CHECK-LABEL: define internal i1 @f.fptosi(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[TO_DOUBLE:%.*]] = sitofp i32 [[X]] to double
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double 0.000000e+00, [[TO_DOUBLE]]
 ; CHECK-NEXT:    [[TO_I32:%.*]] = fptosi double [[ADD]] to i32
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp sgt i32 [[TO_I32]], 300
@@ -189,7 +193,7 @@ define internal i1 @f.fptosi(i32 %x) {
 }
 
 define i1 @caller.fptosi() {
-; CHECK-LABEL: @caller.fptosi(
+; CHECK-LABEL: define i1 @caller.fptosi() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.fptosi(i32 100)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.fptosi(i32 300)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -203,8 +207,9 @@ define i1 @caller.fptosi() {
 
 ; There's nothing we can do besides going to the full range or overdefined.
 define internal i1 @f.fpext(i16 %x) {
-; CHECK-LABEL: @f.fpext(
-; CHECK-NEXT:    [[TO_FLOAT:%.*]] = sitofp i16 [[X:%.*]] to float
+; CHECK-LABEL: define internal i1 @f.fpext(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:    [[TO_FLOAT:%.*]] = sitofp i16 [[X]] to float
 ; CHECK-NEXT:    [[TO_DOUBLE:%.*]] = fpext float [[TO_FLOAT]] to double
 ; CHECK-NEXT:    [[TO_I64:%.*]] = fptoui float [[TO_FLOAT]] to i64
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300
@@ -231,7 +236,7 @@ define internal i1 @f.fpext(i16 %x) {
 
 ; There's nothing we can do besides going to the full range or overdefined.
 define i1 @caller.fpext() {
-; CHECK-LABEL: @caller.fpext(
+; CHECK-LABEL: define i1 @caller.fpext() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.fpext(i16 100)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.fpext(i16 300)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -245,8 +250,9 @@ define i1 @caller.fpext() {
 
 ; There's nothing we can do besides going to the full range or overdefined.
 define internal i1 @f.inttoptr.ptrtoint(i64 %x) {
-; CHECK-LABEL: @f.inttoptr.ptrtoint(
-; CHECK-NEXT:    [[TO_PTR:%.*]] = inttoptr i64 [[X:%.*]] to ptr
+; CHECK-LABEL: define internal i1 @f.inttoptr.ptrtoint(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[TO_PTR:%.*]] = inttoptr i64 [[X]] to ptr
 ; CHECK-NEXT:    [[TO_I64:%.*]] = ptrtoint ptr [[TO_PTR]] to i64
 ; CHECK-NEXT:    [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300
 ; CHECK-NEXT:    [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299
@@ -270,7 +276,7 @@ define internal i1 @f.inttoptr.ptrtoint(i64 %x) {
 }
 
 define i1 @caller.inttoptr.ptrtoint() {
-; CHECK-LABEL: @caller.inttoptr.ptrtoint(
+; CHECK-LABEL: define i1 @caller.inttoptr.ptrtoint() {
 ; CHECK-NEXT:    [[CALL_1:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 100)
 ; CHECK-NEXT:    [[CALL_2:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 300)
 ; CHECK-NEXT:    [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]]
@@ -284,8 +290,9 @@ define i1 @caller.inttoptr.ptrtoint() {
 
 ; Make sure we do not create constant ranges for int to fp casts.
 define i1 @int_range_to_double_cast(i32 %a) {
-; CHECK-LABEL: @int_range_to_double_cast(
-; CHECK-NEXT:    [[R:%.*]] = and i32 [[A:%.*]], 255
+; CHECK-LABEL: define i1 @int_range_to_double_cast(
+; CHECK-SAME: i32 [[A:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], 255
 ; CHECK-NEXT:    [[T4:%.*]] = sitofp i32 [[R]] to double
 ; CHECK-NEXT:    [[T10:%.*]] = fadd double 0.000000e+00, [[T4]]
 ; CHECK-NEXT:    [[T11:%.*]] = fcmp olt double [[T4]], [[T10]]
@@ -300,7 +307,7 @@ define i1 @int_range_to_double_cast(i32 %a) {
 
 ; Make sure we do not use ranges to propagate info from vectors.
 define i16 @vector_binop_and_cast() {
-; CHECK-LABEL: @vector_binop_and_cast(
+; CHECK-LABEL: define i16 @vector_binop_and_cast() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x i16> <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i32 0
 ; CHECK-NEXT:    [[REM:%.*]] = srem <8 x i16> <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>, [[VECINIT7]]
@@ -317,8 +324,9 @@ entry:
 }
 
 define internal i64 @f.sext_to_zext(i32 %t) {
-; CHECK-LABEL: @f.sext_to_zext(
-; CHECK-NEXT:    [[A:%.*]] = zext nneg i32 [[T:%.*]] to i64
+; CHECK-LABEL: define internal range(i64 0, 2) i64 @f.sext_to_zext(
+; CHECK-SAME: i32 [[T:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = zext nneg i32 [[T]] to i64
 ; CHECK-NEXT:    ret i64 [[A]]
 ;
   %a = sext i32 %t to i64
@@ -326,10 +334,11 @@ define internal i64 @f.sext_to_zext(i32 %t) {
 }
 
 define i64 @caller.sext_to_zext(i32 %i) {
-; CHECK-LABEL: @caller.sext_to_zext(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[I:%.*]], 9
+; CHECK-LABEL: define range(i64 0, 2) i64 @caller.sext_to_zext(
+; CHECK-SAME: i32 [[I:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[I]], 9
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    [[T:%.*]] = call i64 @f.sext_to_zext(i32 [[CONV]]), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[T:%.*]] = call i64 @f.sext_to_zext(i32 [[CONV]])
 ; CHECK-NEXT:    ret i64 [[T]]
 ;
   %cmp = icmp sle i32 %i, 9
diff --git a/llvm/test/Transforms/SCCP/ipsccp-basic.ll b/llvm/test/Transforms/SCCP/ipsccp-basic.ll
index 71c042b9b29467..6a7ab8ac2864cd 100644
--- a/llvm/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/llvm/test/Transforms/SCCP/ipsccp-basic.ll
@@ -71,7 +71,7 @@ define void @test3a() {
 }
 
 define i32 @test3b() {
-; CHECK-LABEL: define i32 @test3b() {
+; CHECK-LABEL: define range(i32 0, 18) i32 @test3b() {
 ; CHECK-NEXT:    [[V:%.*]] = load i32, ptr @G, align 4
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[V]], 17
 ; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
@@ -105,7 +105,7 @@ define internal {i64,i64} @test4a() {
 }
 
 define i64 @test4b() personality ptr @__gxx_personality_v0 {
-; CHECK-LABEL: define i64 @test4b() personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define range(i64 0, 6) i64 @test4b() personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    [[A:%.*]] = invoke { i64, i64 } @test4a()
 ; CHECK-NEXT:    to label [[A:%.*]] unwind label [[B:%.*]]
 ; CHECK:       A:
@@ -149,7 +149,7 @@ define internal {i64,i64} @test5a() {
 }
 
 define i64 @test5b() personality ptr @__gxx_personality_v0 {
-; CHECK-LABEL: define i64 @test5b() personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define range(i64 0, 6) i64 @test5b() personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    [[A:%.*]] = invoke { i64, i64 } @test5a()
 ; CHECK-NEXT:    to label [[A:%.*]] unwind label [[B:%.*]]
 ; CHECK:       A:
diff --git a/llvm/test/Transforms/SCCP/switch.ll b/llvm/test/Transforms/SCCP/switch.ll
index 306f0eebf2b408..5208213de210c1 100644
--- a/llvm/test/Transforms/SCCP/switch.ll
+++ b/llvm/test/Transforms/SCCP/switch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S -passes=ipsccp < %s | FileCheck %s
 
 ; Make sure we always consider the default edge executable for a switch
@@ -7,7 +7,7 @@ declare void @foo()
 declare i32 @g(i32)
 
 define void @test1() {
-; CHECK-LABEL: @test1(
+; CHECK-LABEL: define void @test1() {
 ; CHECK-NEXT:    switch i32 undef, label [[D:%.*]] [
 ; CHECK-NEXT:    ]
 ; CHECK:       d:
@@ -21,15 +21,16 @@ d:
 }
 
 define i32 @test_duplicate_successors_phi(i1 %c, i32 %x) {
-; CHECK-LABEL: @test_duplicate_successors_phi(
+; CHECK-LABEL: define i32 @test_duplicate_successors_phi(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[SWITCH:%.*]], label [[END:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[SWITCH:%.*]], label [[END:%.*]]
 ; CHECK:       switch:
 ; CHECK-NEXT:    br label [[SWITCH_DEFAULT:%.*]]
 ; CHECK:       switch.default:
 ; CHECK-NEXT:    ret i32 -1
 ; CHECK:       end:
-; CHECK-NEXT:    ret i32 [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[X]]
 ;
 entry:
   br i1 %c, label %switch, label %end
@@ -49,13 +50,14 @@ end:
 }
 
 define i32 @test_duplicate_successors_phi_2(i1 %c, i32 %x) {
-; CHECK-LABEL: @test_duplicate_successors_phi_2(
+; CHECK-LABEL: define i32 @test_duplicate_successors_phi_2(
+; CHECK-SAME: i1 [[C:%.*]], i32 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[SWITCH:%.*]], label [[END:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[SWITCH:%.*]], label [[END:%.*]]
 ; CHECK:       switch:
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ 1, [[SWITCH]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ 1, [[SWITCH]] ]
 ; CHECK-NEXT:    ret i32 [[PHI]]
 ;
 entry:
@@ -76,22 +78,23 @@ end:
 }
 
 define i32 @test_duplicate_successors_phi_3(i1 %c1, ptr %p, i32 %y) {
-; CHECK-LABEL: @test_duplicate_successors_phi_3(
+; CHECK-LABEL: define i32 @test_duplicate_successors_phi_3(
+; CHECK-SAME: i1 [[C1:%.*]], ptr [[P:%.*]], i32 [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C1:%.*]], label [[SWITCH:%.*]], label [[SWITCH_1:%.*]]
+; CHECK-NEXT:    br i1 [[C1]], label [[SWITCH:%.*]], label [[SWITCH_1:%.*]]
 ; CHECK:       switch:
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    switch i32 [[X]], label [[SWITCH_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[SWITCH_DEFAULT]]
-; CHECK-NEXT:    i32 1, label [[SWITCH_0:%.*]]
-; CHECK-NEXT:    i32 2, label [[SWITCH_0]]
+; CHECK-NEXT:      i32 0, label [[SWITCH_DEFAULT]]
+; CHECK-NEXT:      i32 1, label [[SWITCH_0:%.*]]
+; CHECK-NEXT:      i32 2, label [[SWITCH_0]]
 ; CHECK-NEXT:    ]
 ; CHECK:       switch.default:
 ; CHECK-NEXT:    ret i32 -1
 ; CHECK:       switch.0:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       switch.1:
-; CHECK-NEXT:    ret i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[Y]]
 ;
 entry:
   br i1 %c1, label %switch, label %switch.1
@@ -118,12 +121,13 @@ switch.1:
 }
 
 define i32 @test_local_range(ptr %p) {
-; CHECK-LABEL: @test_local_range(
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]]
+; CHECK-LABEL: define range(i32 0, 3) i32 @test_local_range(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]]
 ; CHECK-NEXT:    switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [
-; CHECK-NEXT:    i32 0, label [[SWITCH_0:%.*]]
-; CHECK-NEXT:    i32 1, label [[SWITCH_1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SWITCH_2:%.*]]
+; CHECK-NEXT:      i32 0, label [[SWITCH_0:%.*]]
+; CHECK-NEXT:      i32 1, label [[SWITCH_1:%.*]]
+; CHECK-NEXT:      i32 2, label [[SWITCH_2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       default.unreachable:
 ; CHECK-NEXT:    unreachable
@@ -160,13 +164,14 @@ switch.3:
 
 ; TODO: Determine that case i3 is dead, even though the edge is shared?
 define i32 @test_duplicate_successors(ptr %p) {
-; CHECK-LABEL: @test_duplicate_successors(
-; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]]
+; CHECK-LABEL: define range(i32 0, 2) i32 @test_duplicate_successors(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]]
 ; CHECK-NEXT:    switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [
-; CHECK-NEXT:    i32 0, label [[SWITCH_0:%.*]]
-; CHECK-NEXT:    i32 1, label [[SWITCH_0]]
-; CHECK-NEXT:    i32 2, label [[SWITCH_1:%.*]]
-; CHECK-NEXT:    i32 3, label [[SWITCH_1]]
+; CHECK-NEXT:      i32 0, label [[SWITCH_0:%.*]]
+; CHECK-NEXT:      i32 1, label [[SWITCH_0]]
+; CHECK-NEXT:      i32 2, label [[SWITCH_1:%.*]]
+; CHECK-NEXT:      i32 3, label [[SWITCH_1]]
 ; CHECK-NEXT:    ]
 ; CHECK:       default.unreachable:
 ; CHECK-NEXT:    unreachable
@@ -201,11 +206,12 @@ switch.2:
 ; Case i32 2 is dead as well, but this cannot be determined based on
 ; range information.
 define internal i32 @test_ip_range(i32 %x) {
-; CHECK-LABEL: @test_ip_range(
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[DEFAULT_UNREACHABLE:%.*]] [
-; CHECK-NEXT:    i32 3, label [[SWITCH_3:%.*]]
-; CHECK-NEXT:    i32 1, label [[SWITCH_1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SWITCH_2:%.*]]
+; CHECK-LABEL: define internal range(i32 1, 4) i32 @test_ip_range(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [
+; CHECK-NEXT:      i32 3, label [[SWITCH_3:%.*]]
+; CHECK-NEXT:      i32 1, label [[SWITCH_1:%.*]]
+; CHECK-NEXT:      i32 2, label [[SWITCH_2:%.*]]
 ; CHECK-NEXT:    ], !prof [[PROF1:![0-9]+]]
 ; CHECK:       default.unreachable:
 ; CHECK-NEXT:    unreachable
@@ -240,9 +246,9 @@ switch.3:
 }
 
 define void @call_test_ip_range() {
-; CHECK-LABEL: @call_test_ip_range(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @test_ip_range(i32 1), !range [[RNG2:![0-9]+]]
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test_ip_range(i32 3), !range [[RNG2]]
+; CHECK-LABEL: define void @call_test_ip_range() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @test_ip_range(i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test_ip_range(i32 3)
 ; CHECK-NEXT:    ret void
 ;
   call i32 @test_ip_range(i32 1)
@@ -251,11 +257,12 @@ define void @call_test_ip_range() {
 }
 
 define i32 @test_switch_range_may_include_undef(i1 %c.1, i1 %c.2, i32 %x) {
-; CHECK-LABEL: @test_switch_range_may_include_undef(
+; CHECK-LABEL: define range(i32 -1, 21) i32 @test_switch_range_may_include_undef(
+; CHECK-SAME: i1 [[C_1:%.*]], i1 [[C_2:%.*]], i32 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]]
 ; CHECK:       then.1:
-; CHECK-NEXT:    br i1 [[C_2:%.*]], label [[SWITCH:%.*]], label [[ELSE_2:%.*]]
+; CHECK-NEXT:    br i1 [[C_2]], label [[SWITCH:%.*]], label [[ELSE_2:%.*]]
 ; CHECK:       else.1:
 ; CHECK-NEXT:    br label [[SWITCH]]
 ; CHECK:       else.2:
@@ -263,8 +270,8 @@ define i32 @test_switch_range_may_include_undef(i1 %c.1, i1 %c.2, i32 %x) {
 ; CHECK:       switch:
 ; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 0, [[THEN_1]] ], [ 2, [[ELSE_1]] ], [ undef, [[ELSE_2]] ]
 ; CHECK-NEXT:    switch i32 [[P]], label [[SWITCH_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[END_1:%.*]]
-; CHECK-NEXT:    i32 3, label [[END_2:%.*]]
+; CHECK-NEXT:      i32 0, label [[END_1:%.*]]
+; CHECK-NEXT:      i32 3, label [[END_2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       switch.default:
 ; CHECK-NEXT:    ret i32 -1
@@ -303,9 +310,10 @@ end.2:
 }
 
 define i32 @test_default_unreachable_by_dom_cond(i32 %x) {
-; CHECK-LABEL: @test_default_unreachable_by_dom_cond(
+; CHECK-LABEL: define i32 @test_default_unreachable_by_dom_cond(
+; CHECK-SAME: i32 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[OR_COND:%.*]] = icmp ult i32 [[X]], 4
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [
@@ -371,4 +379,7 @@ return:
 
 declare void @llvm.assume(i1)
 
-; CHECK: !1 = !{!"branch_weights", i32 1, i32 5, i32 3, i32 4}
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 3}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 5, i32 3, i32 4}
+;.
diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
index fc3e56011d46cd..d3bac0d68a979f 100644
--- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
@@ -16,7 +16,7 @@ entry:
 }
 
 define i8 @range_from_or_nsw(i16 %a) {
-; CHECK-LABEL: define i8 @range_from_or_nsw(
+; CHECK-LABEL: define range(i8 -128, 0) i8 @range_from_or_nsw(
 ; CHECK-SAME: i16 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AND1:%.*]] = or i16 [[A]], -128
@@ -30,7 +30,7 @@ entry:
 }
 
 define i16 @range_from_and_nuw_nsw(i32 %a) {
-; CHECK-LABEL: define i16 @range_from_and_nuw_nsw(
+; CHECK-LABEL: define range(i16 0, -32768) i16 @range_from_and_nuw_nsw(
 ; CHECK-SAME: i32 [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AND1:%.*]] = and i32 [[A]], 32767
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll
new file mode 100644
index 00000000000000..5ec6b4f1040d81
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
+
+define i32 @test(ptr %f, i16 %0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[F:%.*]], i16 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[F]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 0, i16 0>, i16 [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i16> <i16 0, i16 poison, i16 0, i16 0>, i16 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <4 x i32> [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[ZEXT_4:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT:    ret i32 [[ZEXT_4]]
+;
+entry:
+  %1 = load i16, ptr %f, align 2
+
+  %zext.0 = zext i16 %1 to i32
+  %sext.0 = sext i16 %0 to i32
+
+  %zext.1 = zext i16 0 to i32
+  %sext.1 = sext i16 0 to i32
+  %zext.2 = zext i16 0 to i32
+  %sext.2 = sext i16 0 to i32
+  %zext.3 = zext i16 0 to i32
+  %sext.3 = sext i16 0 to i32
+
+  %cmp.0 = icmp ule i32 %zext.0, %sext.0
+  %cmp.1 = icmp ule i32 %zext.1, %sext.1
+  %cmp.2 = icmp ule i32 %zext.2, %sext.2
+  %cmp.3 = icmp ule i32 %zext.3, %sext.3
+
+  %and.0 = and i1 %cmp.0, %cmp.1
+  %and.1 = and i1 %and.0, %cmp.2
+  %and.2 = and i1 %and.1, %cmp.3
+
+  %zext.4 = zext i1 %and.2 to i32
+
+  ret i32 %zext.4
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 757340527ec030..ef2d3219cca9b6 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -627,7 +627,233 @@ else:
   ret void
 }
 
+define i32 @test_assume_false(i32 %cond) {
+; CHECK-LABEL: @test_assume_false(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  switch i32 %cond, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case2
+  ]
+
+case0:
+  br label %exit
+
+case1:
+  br label %exit
+
+case2:
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %bool = phi i1 [ false, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ]
+  %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ]
+  call void @llvm.assume(i1 %bool)
+  ret i32 %res
+}
+
+define i32 @test_assume_undef(i32 %cond) {
+; CHECK-LABEL: @test_assume_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  switch i32 %cond, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case2
+  ]
+
+case0:
+  br label %exit
+
+case1:
+  br label %exit
+
+case2:
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %bool = phi i1 [ undef, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ]
+  %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ]
+  call void @llvm.assume(i1 %bool)
+  ret i32 %res
+}
+
+define i32 @test_assume_var(i32 %cond, i1 %var) {
+; CHECK-LABEL: @test_assume_var(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[BOOL:%.*]] = phi i1 [ [[VAR:%.*]], [[DEFAULT]] ], [ true, [[CASE1]] ], [ true, [[CASE2]] ], [ true, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[BOOL]])
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  switch i32 %cond, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case2
+  ]
+
+case0:
+  br label %exit
 
+case1:
+  br label %exit
+
+case2:
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %bool = phi i1 [ %var, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ]
+  %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ]
+  call void @llvm.assume(i1 %bool)
+  ret i32 %res
+}
+
+define i32 @test_assume_bundle_nonnull(i32 %cond, ptr nonnull %p) {
+; CHECK-LABEL: @test_assume_bundle_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ null, [[DEFAULT]] ], [ [[P:%.*]], [[CASE1]] ], [ [[P]], [[CASE2]] ], [ [[P]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR]]) ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  switch i32 %cond, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case2
+  ]
+
+case0:
+  br label %exit
+
+case1:
+  br label %exit
+
+case2:
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %ptr = phi ptr [ null, %default ], [ %p, %case0 ], [ %p, %case1 ], [ %p, %case2 ]
+  %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ]
+  call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr) ]
+  ret i32 %res
+}
+
+define i32 @test_assume_bundle_align(i32 %cond, ptr nonnull %p) {
+; CHECK-LABEL: @test_assume_bundle_align(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i32 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:      i32 2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case1:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ null, [[DEFAULT]] ], [ [[P:%.*]], [[CASE1]] ], [ [[P]], [[CASE2]] ], [ [[P]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i32 8) ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  switch i32 %cond, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case2
+  ]
+
+case0:
+  br label %exit
+
+case1:
+  br label %exit
+
+case2:
+  br label %exit
+
+default:
+  br label %exit
+
+exit:
+  %ptr = phi ptr [ null, %default ], [ %p, %case0 ], [ %p, %case1 ], [ %p, %case2 ]
+  %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i32 8) ]
+  ret i32 %res
+}
 
 attributes #0 = { null_pointer_is_valid }
 ;.
diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
index b8a33f74bd570f..5503f7343cb672 100644
--- a/llvm/tools/gold/gold-plugin.cpp
+++ b/llvm/tools/gold/gold-plugin.cpp
@@ -434,8 +434,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
       // FIXME: When binutils 2.31 (containing gold 1.16) is the minimum
       // required version, this should be changed to:
       // get_wrap_symbols = tv->tv_u.tv_get_wrap_symbols;
-      get_wrap_symbols =
-          (ld_plugin_get_wrap_symbols)tv->tv_u.tv_message;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-function-type"
+      get_wrap_symbols = (ld_plugin_get_wrap_symbols)tv->tv_u.tv_message;
+#pragma GCC diagnostic pop
       break;
     default:
       break;
diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
index 80064ed9848517..1f183975d9481f 100644
--- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
+++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp
@@ -133,9 +133,7 @@ getInterfaceFile(const StringRef Filename, bool ResetBanner = true) {
   std::unique_ptr<InterfaceFile> IF;
   switch (identify_magic(Buffer->getBuffer())) {
   case file_magic::macho_dynamically_linked_shared_lib:
-    LLVM_FALLTHROUGH;
   case file_magic::macho_dynamically_linked_shared_lib_stub:
-    LLVM_FALLTHROUGH;
   case file_magic::macho_universal_binary:
     IF = ExitOnErr(DylibReader::get(Buffer->getMemBufferRef()));
     break;
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index 3fa4b2cf73b6be..dddd2f73d4446b 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -81,6 +81,7 @@ TEST_F(IntrinsicsTest, InstrProfInheritance) {
   __ISA(InstrProfCoverInst, InstrProfCntrInstBase);
   __ISA(InstrProfIncrementInst, InstrProfCntrInstBase);
   __ISA(InstrProfIncrementInstStep, InstrProfIncrementInst);
+  __ISA(InstrProfCallsite, InstrProfCntrInstBase);
   __ISA(InstrProfTimestampInst, InstrProfCntrInstBase);
   __ISA(InstrProfValueProfileInst, InstrProfCntrInstBase);
   __ISA(InstrProfMCDCBitmapInstBase, InstrProfInstBase);
@@ -94,6 +95,7 @@ TEST_F(IntrinsicsTest, InstrProfInheritance) {
           {Intrinsic::instrprof_cover, isInstrProfCoverInst},
           {Intrinsic::instrprof_increment, isInstrProfIncrementInst},
           {Intrinsic::instrprof_increment_step, isInstrProfIncrementInstStep},
+          {Intrinsic::instrprof_callsite, isInstrProfCallsite},
           {Intrinsic::instrprof_mcdc_condbitmap_update,
            isInstrProfMCDCCondBitmapUpdate},
           {Intrinsic::instrprof_mcdc_parameters,
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 7e00a80cacf933..98dacd3511e1d8 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -122,14 +122,6 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
   return false;
 }
 
-MemProfSchema getFullSchema() {
-  MemProfSchema Schema;
-#define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name);
-#include "llvm/ProfileData/MIBEntryDef.inc"
-#undef MIBEntryDef
-  return Schema;
-}
-
 TEST(MemProf, FillsValue) {
   std::unique_ptr<MockSymbolizer> Symbolizer(new MockSymbolizer());
 
@@ -187,7 +179,7 @@ TEST(MemProf, FillsValue) {
 
   // Check the memprof record for foo.
   const llvm::GlobalValue::GUID FooId = IndexedMemProfRecord::getGUID("foo");
-  ASSERT_EQ(Records.count(FooId), 1U);
+  ASSERT_TRUE(Records.contains(FooId));
   const MemProfRecord &Foo = Records[FooId];
   ASSERT_THAT(Foo.AllocSites, SizeIs(1));
   EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U);
@@ -203,7 +195,7 @@ TEST(MemProf, FillsValue) {
 
   // Check the memprof record for bar.
   const llvm::GlobalValue::GUID BarId = IndexedMemProfRecord::getGUID("bar");
-  ASSERT_EQ(Records.count(BarId), 1U);
+  ASSERT_TRUE(Records.contains(BarId));
   const MemProfRecord &Bar = Records[BarId];
   ASSERT_THAT(Bar.AllocSites, SizeIs(1));
   EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U);
@@ -223,7 +215,7 @@ TEST(MemProf, FillsValue) {
 
   // Check the memprof record for xyz.
   const llvm::GlobalValue::GUID XyzId = IndexedMemProfRecord::getGUID("xyz");
-  ASSERT_EQ(Records.count(XyzId), 1U);
+  ASSERT_TRUE(Records.contains(XyzId));
   const MemProfRecord &Xyz = Records[XyzId];
   ASSERT_THAT(Xyz.CallSites, SizeIs(1));
   ASSERT_THAT(Xyz.CallSites[0], SizeIs(2));
@@ -234,7 +226,7 @@ TEST(MemProf, FillsValue) {
 
   // Check the memprof record for abc.
   const llvm::GlobalValue::GUID AbcId = IndexedMemProfRecord::getGUID("abc");
-  ASSERT_EQ(Records.count(AbcId), 1U);
+  ASSERT_TRUE(Records.contains(AbcId));
   const MemProfRecord &Abc = Records[AbcId];
   EXPECT_TRUE(Abc.AllocSites.empty());
   ASSERT_THAT(Abc.CallSites, SizeIs(1));
@@ -248,7 +240,7 @@ TEST(MemProf, PortableWrapper) {
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
                     /*dealloc_cpu=*/4);
 
-  const auto Schema = getFullSchema();
+  const auto Schema = llvm::memprof::getFullSchema();
   PortableMemInfoBlock WriteBlock(Info);
 
   std::string Buffer;
@@ -271,7 +263,7 @@ TEST(MemProf, PortableWrapper) {
 // Version0 and Version1 serialize IndexedMemProfRecord in the same format, so
 // we share one test.
 TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
-  const MemProfSchema Schema = getFullSchema();
+  const auto Schema = llvm::memprof::getFullSchema();
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
@@ -305,7 +297,7 @@ TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
 }
 
 TEST(MemProf, RecordSerializationRoundTripVerion2) {
-  const MemProfSchema Schema = getFullSchema();
+  const auto Schema = llvm::memprof::getFullSchema();
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 0fc50f79466a45..d83ae8013c51e2 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -92,11 +92,11 @@ For later versions of Visual Studio, no setup is required.
 
   <Type Name="llvm::PointerIntPair&lt;*&gt;">
     <DisplayString IncludeView="ptr">{($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)}</DisplayString>
-    <DisplayString IncludeView="int">{($T3)((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}</DisplayString>
-    <DisplayString>{$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)} [{($T3)((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}]</DisplayString>
+    <DisplayString IncludeView="int">{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}</DisplayString>
+    <DisplayString>{$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)} [{((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}]</DisplayString>
     <Expand>
       <Item Name="[ptr]">($T1)(*(intptr_t *)Value.Data &amp; $T5::PointerBitMask)</Item>
-      <Item Name="[int]">($T3)((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)</Item>
+      <Item Name="[int]">((*(intptr_t *)Value.Data &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)</Item>
     </Expand>
   </Type>
   <!-- PointerUnion types -->
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index 7a5d2be3ae95b2..88d353e89a4614 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -4246,7 +4246,7 @@ static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) {
 
 void CodeGenDAGPatterns::ParseOnePattern(
     Record *TheDef, TreePattern &Pattern, TreePattern &Result,
-    const std::vector<Record *> &InstImpResults) {
+    const std::vector<Record *> &InstImpResults, bool ShouldIgnore) {
 
   // Inline pattern fragments and expand multiple alternatives.
   Pattern.InlinePatternFragments();
@@ -4332,7 +4332,7 @@ void CodeGenDAGPatterns::ParseOnePattern(
         AddPatternToMatch(&Pattern,
                           PatternToMatch(TheDef, Preds, T, Temp.getOnlyTree(),
                                          InstImpResults, Complexity,
-                                         TheDef->getID()));
+                                         TheDef->getID(), ShouldIgnore));
     }
   } else {
     // Show a message about a dropped pattern with some info to make it
@@ -4378,7 +4378,8 @@ void CodeGenDAGPatterns::ParsePatterns() {
       FindPatternInputsAndOutputs(Pattern, Pattern.getTree(j), InstInputs,
                                   InstResults, InstImpResults);
 
-    ParseOnePattern(CurPattern, Pattern, Result, InstImpResults);
+    ParseOnePattern(CurPattern, Pattern, Result, InstImpResults,
+                    CurPattern->getValueAsBit("GISelShouldIgnore"));
   }
 }
 
@@ -4407,10 +4408,10 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
       return;
     }
 
-    PatternsToMatch.emplace_back(P.getSrcRecord(), P.getPredicates(),
-                                 std::move(NewSrc), std::move(NewDst),
-                                 P.getDstRegs(), P.getAddedComplexity(),
-                                 Record::getNewUID(Records), Check);
+    PatternsToMatch.emplace_back(
+        P.getSrcRecord(), P.getPredicates(), std::move(NewSrc),
+        std::move(NewDst), P.getDstRegs(), P.getAddedComplexity(),
+        Record::getNewUID(Records), P.getGISelShouldIgnore(), Check);
   };
 
   for (PatternToMatch &P : Copy) {
@@ -4781,6 +4782,7 @@ void CodeGenDAGPatterns::GenerateVariants() {
           Variant, PatternsToMatch[i].getDstPatternShared(),
           PatternsToMatch[i].getDstRegs(),
           PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(Records),
+          PatternsToMatch[i].getGISelShouldIgnore(),
           PatternsToMatch[i].getHwModeFeatures());
     }
 
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
index 7fcd39a9e940cc..7f94db0b7d5d76 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h
@@ -1057,17 +1057,19 @@ class PatternToMatch {
   TreePatternNodePtr DstPattern; // Resulting pattern.
   std::vector<Record *> Dstregs; // Physical register defs being matched.
   std::string HwModeFeatures;
-  int AddedComplexity; // Add to matching pattern complexity.
-  unsigned ID;         // Unique ID for the record.
+  int AddedComplexity;    // Add to matching pattern complexity.
+  bool GISelShouldIgnore; // Should GlobalISel ignore importing this pattern.
+  unsigned ID;            // Unique ID for the record.
 
 public:
   PatternToMatch(Record *srcrecord, ListInit *preds, TreePatternNodePtr src,
                  TreePatternNodePtr dst, std::vector<Record *> dstregs,
-                 int complexity, unsigned uid, const Twine &hwmodefeatures = "")
+                 int complexity, unsigned uid, bool ignore,
+                 const Twine &hwmodefeatures = "")
       : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src),
         DstPattern(dst), Dstregs(std::move(dstregs)),
         HwModeFeatures(hwmodefeatures.str()), AddedComplexity(complexity),
-        ID(uid) {}
+        GISelShouldIgnore(ignore), ID(uid) {}
 
   Record *getSrcRecord() const { return SrcRecord; }
   ListInit *getPredicates() const { return Predicates; }
@@ -1078,6 +1080,7 @@ class PatternToMatch {
   const std::vector<Record *> &getDstRegs() const { return Dstregs; }
   StringRef getHwModeFeatures() const { return HwModeFeatures; }
   int getAddedComplexity() const { return AddedComplexity; }
+  bool getGISelShouldIgnore() const { return GISelShouldIgnore; }
   unsigned getID() const { return ID; }
 
   std::string getPredicateCheck() const;
@@ -1240,7 +1243,8 @@ class CodeGenDAGPatterns {
 
   void ParseOnePattern(Record *TheDef, TreePattern &Pattern,
                        TreePattern &Result,
-                       const std::vector<Record *> &InstImpResults);
+                       const std::vector<Record *> &InstImpResults,
+                       bool ShouldIgnore = false);
   void AddPatternToMatch(TreePattern *Pattern, PatternToMatch &&PTM);
   void FindPatternInputsAndOutputs(
       TreePattern &I, TreePatternNodePtr Pat,
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index f2504775d557f2..0439df8067ede8 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -97,7 +97,7 @@ static ParameterKind getParameterKind(const Record *R) {
     if (R->getValueAsInt("isHalfOrFloat") || R->getValueAsInt("isI16OrI32")) {
       return ParameterKind::Overload;
     }
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   default:
     llvm_unreachable("Support for specified DXIL Type not yet implemented");
   }
@@ -272,7 +272,7 @@ static std::string getOverloadKindStr(const Record *R) {
         return "OverloadKind::I16 | OverloadKind::I32";
       }
     }
-    LLVM_FALLTHROUGH;
+    [[fallthrough]];
   default:
     llvm_unreachable(
         "Support for specified parameter OverloadKind not yet implemented");
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 25e302ce1ca46f..78abf80e7aecb3 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -2411,6 +2411,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
   for (const PatternToMatch &Pat : CGP.ptms()) {
     ++NumPatternTotal;
 
+    if (Pat.getGISelShouldIgnore())
+      continue; // skip without warning
     auto MatcherOrErr = runOnPattern(Pat);
 
     // The pattern analysis can fail, indicating an unsupported pattern.
diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
index 26034e31ad8d19..217b531dcfd394 100644
--- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This tablegen backend emits the include file needed by the target
-// parser to parse the RISC-V CPUs.
+// This tablegen backend emits the include file needed by RISCVTargetParser.cpp
+// and RISCVISAInfo.cpp to parse the RISC-V CPUs and extensions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +17,94 @@
 
 using namespace llvm;
 
+static StringRef getExtensionName(const Record *R) {
+  StringRef Name = R->getValueAsString("Name");
+  Name.consume_front("experimental-");
+  return Name;
+}
+
+static void printExtensionTable(raw_ostream &OS,
+                                const std::vector<Record *> &Extensions,
+                                bool Experimental) {
+  OS << "static const RISCVSupportedExtension Supported";
+  if (Experimental)
+    OS << "Experimental";
+  OS << "Extensions[] = {\n";
+
+  for (Record *R : Extensions) {
+    if (R->getValueAsBit("Experimental") != Experimental)
+      continue;
+
+    OS << "    {\"" << getExtensionName(R) << "\", {"
+       << R->getValueAsInt("MajorVersion") << ", "
+       << R->getValueAsInt("MinorVersion") << "}},\n";
+  }
+
+  OS << "};\n\n";
+}
+
+// Get the extension name from the Record name. This gives the canonical
+// capitalization.
+static StringRef getExtensionNameFromRecordName(const Record *R) {
+  StringRef Name = R->getName();
+  if (!Name.consume_front("FeatureStdExt"))
+    Name.consume_front("FeatureVendor");
+
+  return Name;
+}
+
+static void emitRISCVExtensions(RecordKeeper &Records, raw_ostream &OS) {
+  OS << "#ifdef GET_SUPPORTED_EXTENSIONS\n";
+  OS << "#undef GET_SUPPORTED_EXTENSIONS\n\n";
+
+  std::vector<Record *> Extensions =
+      Records.getAllDerivedDefinitions("RISCVExtension");
+  llvm::sort(Extensions, [](const Record *Rec1, const Record *Rec2) {
+    return getExtensionName(Rec1) < getExtensionName(Rec2);
+  });
+
+  printExtensionTable(OS, Extensions, /*Experimental=*/false);
+  printExtensionTable(OS, Extensions, /*Experimental=*/true);
+
+  OS << "#endif // GET_SUPPORTED_EXTENSIONS\n\n";
+
+  OS << "#ifdef GET_IMPLIED_EXTENSIONS\n";
+  OS << "#undef GET_IMPLIED_EXTENSIONS\n\n";
+
+  for (Record *Ext : Extensions) {
+    auto ImpliesList = Ext->getValueAsListOfDefs("Implies");
+    if (ImpliesList.empty())
+      continue;
+
+    OS << "static const char *ImpliedExts"
+       << getExtensionNameFromRecordName(Ext) << "[] = {";
+
+    ListSeparator LS(", ");
+    for (auto *ImpliedExt : ImpliesList) {
+      if (!ImpliedExt->isSubClassOf("RISCVExtension"))
+        continue;
+
+      OS << LS << '"' << getExtensionName(ImpliedExt) << '"';
+    }
+
+    OS << "};\n";
+  }
+
+  OS << "\nstatic constexpr ImpliedExtsEntry ImpliedExts[] = {\n";
+  for (Record *Ext : Extensions) {
+    auto ImpliesList = Ext->getValueAsListOfDefs("Implies");
+    if (ImpliesList.empty())
+      continue;
+
+    OS << "    { {\"" << getExtensionName(Ext) << "\"}, {ImpliedExts"
+       << getExtensionNameFromRecordName(Ext) << "} },\n";
+  }
+
+  OS << "};\n\n";
+
+  OS << "#endif // GET_IMPLIED_EXTENSIONS\n\n";
+}
+
 // We can generate march string from target features as what has been described
 // in RISC-V ISA specification (version 20191213) 'Chapter 27. ISA Extension
 // Naming Conventions'.
@@ -54,7 +142,7 @@ static void printMArch(raw_ostream &OS, const Record &Rec) {
     OS << LS << Ext.first << Ext.second.Major << 'p' << Ext.second.Minor;
 }
 
-static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) {
+static void emitRISCVProcs(RecordKeeper &RK, raw_ostream &OS) {
   OS << "#ifndef PROC\n"
      << "#define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS)\n"
      << "#endif\n\n";
@@ -101,5 +189,11 @@ static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) {
   OS << "\n#undef TUNE_PROC\n";
 }
 
+static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) {
+  emitRISCVExtensions(RK, OS);
+  emitRISCVProcs(RK, OS);
+}
+
 static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef,
-                                "Generate the list of CPU for RISCV");
+                                "Generate the list of CPUs and extensions for "
+                                "RISC-V");
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
index c9e081383fa026..0d27b786da1f63 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn
@@ -25,6 +25,7 @@ static_library("modernize") {
     "MakeSharedCheck.cpp",
     "MakeSmartPtrCheck.cpp",
     "MakeUniqueCheck.cpp",
+    "MinMaxUseInitializerListCheck.cpp",
     "ModernizeTidyModule.cpp",
     "PassByValueCheck.cpp",
     "RawStringLiteralCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 59dc38c8c4d8a8..815c5a93c72f75 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -35,6 +35,7 @@ static_library("readability") {
     "IsolateDeclarationCheck.cpp",
     "MagicNumbersCheck.cpp",
     "MakeMemberFunctionConstCheck.cpp",
+    "MathMissingParenthesesCheck.cpp",
     "MisleadingIndentationCheck.cpp",
     "MisplacedArrayIndexCheck.cpp",
     "NamedParameterCheck.cpp",
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h b/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h
new file mode 100644
index 00000000000000..6c3643f7835cbe
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h
@@ -0,0 +1,21 @@
+//===- RuntimeOpVerification.h - Op Verification ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H
+#define MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H
+
+namespace mlir {
+class DialectRegistry;
+
+namespace linalg {
+void registerRuntimeVerifiableOpInterfaceExternalModels(
+    DialectRegistry &registry);
+} // namespace linalg
+} // namespace mlir
+
+#endif // MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H
diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
index 8a57c6094c41c0..030be328e97fd0 100644
--- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
+++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
@@ -194,7 +194,7 @@ bool isLinearizableVector(VectorType type);
 /// for each dimension of the passed in tensor.
 Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source,
                              ArrayRef<int64_t> readShape, Value padValue,
-                             bool useInBoundsInsteadOfMasking = true);
+                             bool useInBoundsInsteadOfMasking);
 
 /// Returns success if `inputVectorSizes` is a valid masking configuraion for
 /// given `shape`, i.e., it meets:
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index c4d788cf8ed316..d9db21073e15c7 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -45,6 +45,7 @@
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h"
+#include "mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h"
 #include "mlir/Dialect/MLProgram/IR/MLProgram.h"
 #include "mlir/Dialect/MLProgram/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/MPI/IR/MPI.h"
@@ -161,6 +162,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
   cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
   gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
   linalg::registerAllDialectInterfaceImplementations(registry);
+  linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
   memref::registerAllocationOpInterfaceExternalModels(registry);
   memref::registerBufferViewFlowOpInterfaceExternalModels(registry);
   memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
diff --git a/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td b/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td
index d5f11d00cc3d2a..6fd0df59d9d2e0 100644
--- a/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td
+++ b/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td
@@ -35,6 +35,12 @@ def RuntimeVerifiableOpInterface : OpInterface<"RuntimeVerifiableOpInterface"> {
                     "::mlir::Location":$loc)
     >,
   ];
+
+  let extraClassDeclaration = [{
+    /// Generate the error message that will be printed to the user when 
+    /// verification fails.
+    static std::string generateErrorMessage(Operation *op, const std::string &msg);
+  }];
 }
 
 #endif // MLIR_INTERFACES_RUNTIMEVERIFIABLEOPINTERFACE
diff --git a/mlir/include/mlir/Tools/lsp-server-support/Transport.h b/mlir/include/mlir/Tools/lsp-server-support/Transport.h
index 44c71058cf717c..ce742be7a941c9 100644
--- a/mlir/include/mlir/Tools/lsp-server-support/Transport.h
+++ b/mlir/include/mlir/Tools/lsp-server-support/Transport.h
@@ -147,15 +147,9 @@ class MessageHandler {
                     void (ThisT::*handler)(const Param &)) {
     notificationHandlers[method] = [method, handler,
                                     thisPtr](llvm::json::Value rawParams) {
-      llvm::Expected<Param> param =
-          parse<Param>(rawParams, method, "notification");
-      if (!param) {
-        return llvm::consumeError(
-            llvm::handleErrors(param.takeError(), [](const LSPError &lspError) {
-              Logger::error("JSON parsing error: {0}",
-                            lspError.message.c_str());
-            }));
-      }
+      llvm::Expected<Param> param = parse<Param>(rawParams, method, "request");
+      if (!param)
+        return llvm::consumeError(param.takeError());
       (thisPtr->*handler)(*param);
     };
   }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index ee6e391d0cc682..3b5282a09569d7 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -27,6 +27,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   NamedOpConversions.cpp
   Padding.cpp
   Promotion.cpp
+  RuntimeOpVerification.cpp
   Specialize.cpp
   Split.cpp
   SplitReduction.cpp
@@ -60,6 +61,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRFuncDialect
   MLIRFuncToLLVM
   MLIRFuncTransforms
+  MLIRIndexDialect
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRMemRefDialect
diff --git a/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp
new file mode 100644
index 00000000000000..b30182dc84079f
--- /dev/null
+++ b/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp
@@ -0,0 +1,135 @@
+//===- RuntimeOpVerification.cpp - Op Verification ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/Dialect/Index/IR/IndexAttrs.h"
+#include "mlir/Dialect/Index/IR/IndexDialect.h"
+#include "mlir/Dialect/Index/IR/IndexOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Interfaces/RuntimeVerifiableOpInterface.h"
+
+namespace mlir {
+namespace linalg {
+namespace {
+/// Verify that the runtime sizes of the operands to linalg structured ops are
+/// compatible with the runtime sizes inferred by composing the loop ranges with
+/// the linalg op's indexing maps. This is similar to the verifier except that
+/// here we insert IR to perform the verification at runtime.
+template <typename T>
+struct StructuredOpInterface
+    : public RuntimeVerifiableOpInterface::ExternalModel<
+          StructuredOpInterface<T>, T> {
+  void generateRuntimeVerification(Operation *op, OpBuilder &builder,
+                                   Location loc) const {
+    auto linalgOp = llvm::cast<LinalgOp>(op);
+
+    SmallVector<Range> loopRanges = linalgOp.createLoopRanges(builder, loc);
+    auto [starts, ends, _] = getOffsetsSizesAndStrides(loopRanges);
+
+    auto zero = builder.create<arith::ConstantIndexOp>(loc, 0);
+    auto one = builder.create<arith::ConstantIndexOp>(loc, 1);
+
+    // Subtract one from the loop ends before composing with the indexing map
+    transform(ends, ends.begin(), [&](OpFoldResult end) {
+      auto endValue = getValueOrCreateConstantIndexOp(builder, loc, end);
+      return builder.createOrFold<index::SubOp>(loc, endValue, one);
+    });
+
+    for (OpOperand &opOperand : linalgOp->getOpOperands()) {
+      AffineMap indexingMap = linalgOp.getMatchingIndexingMap(&opOperand);
+      auto startIndices = affine::makeComposedFoldedMultiResultAffineApply(
+          builder, loc, indexingMap, starts);
+      auto endIndices = affine::makeComposedFoldedMultiResultAffineApply(
+          builder, loc, indexingMap, ends);
+
+      for (auto dim : llvm::seq(linalgOp.getRank(&opOperand))) {
+        auto startIndex =
+            getValueOrCreateConstantIndexOp(builder, loc, startIndices[dim]);
+        auto endIndex =
+            getValueOrCreateConstantIndexOp(builder, loc, endIndices[dim]);
+
+        // Generate:
+        //   minIndex = min(startIndex, endIndex)
+        //   assert(minIndex >= 0)
+        // To ensure we do not generate a negative index. We take the minimum of
+        // the start and end indices in order to handle reverse loops such as
+        // `affine_map<(i) -> (3 - i)>`
+        auto min =
+            builder.createOrFold<index::MinSOp>(loc, startIndex, endIndex);
+        auto cmpOp = builder.createOrFold<index::CmpOp>(
+            loc, index::IndexCmpPredicate::SGE, min, zero);
+        auto msg = RuntimeVerifiableOpInterface::generateErrorMessage(
+            linalgOp, "unexpected negative result on dimension #" +
+                          std::to_string(dim) + " of input/output operand #" +
+                          std::to_string(opOperand.getOperandNumber()));
+        builder.createOrFold<cf::AssertOp>(loc, cmpOp, msg);
+
+        // Generate:
+        //   inferredDimSize = max(startIndex, endIndex) + 1
+        //   actualDimSize = dim(operand)
+        //   assert(inferredDimSize <= actualDimSize)
+        // To ensure that we do not index past the bounds of the operands.
+        auto max =
+            builder.createOrFold<index::MaxSOp>(loc, startIndex, endIndex);
+
+        auto inferredDimSize =
+            builder.createOrFold<index::AddOp>(loc, max, one);
+
+        auto actualDimSize =
+            createOrFoldDimOp(builder, loc, opOperand.get(), dim);
+
+        // Similar to the verifier, when the affine expression in the indexing
+        // map is complicated, we just check that the inferred dimension sizes
+        // are in the boundary of the operands' size. Being more precise than
+        // that is difficult.
+        auto predicate = isa<AffineDimExpr>(indexingMap.getResult(dim))
+                             ? index::IndexCmpPredicate::EQ
+                             : index::IndexCmpPredicate::SLE;
+
+        cmpOp = builder.createOrFold<index::CmpOp>(
+            loc, predicate, inferredDimSize, actualDimSize);
+        msg = RuntimeVerifiableOpInterface::generateErrorMessage(
+            linalgOp, "dimension #" + std::to_string(dim) +
+                          " of input/output operand #" +
+                          std::to_string(opOperand.getOperandNumber()) +
+                          " is incompatible with inferred dimension size");
+        builder.createOrFold<cf::AssertOp>(loc, cmpOp, msg);
+      }
+    }
+  }
+};
+
+template <typename... OpTs>
+void attachInterface(MLIRContext *ctx) {
+  (OpTs::template attachInterface<StructuredOpInterface<OpTs>>(*ctx), ...);
+}
+} // namespace
+} // namespace linalg
+} // namespace mlir
+
+void mlir::linalg::registerRuntimeVerifiableOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *) {
+    attachInterface<
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+        >(ctx);
+
+    // Load additional dialects of which ops may get created.
+    ctx->loadDialect<affine::AffineDialect, arith::ArithDialect,
+                     cf::ControlFlowDialect, index::IndexDialect,
+                     tensor::TensorDialect>();
+  });
+}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index e836f0dc63b4f9..ef9a30be9a0153 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1499,11 +1499,11 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
   // If the input vector sizes are not provided, then the vector sizes are
   // determined by the result tensor shape. In case the vector sizes aren't
   // provided, we update the inBounds attribute instead of masking.
-  bool useInBoundsInsteadOfMasking = true;
+  bool useInBoundsInsteadOfMasking = false;
   if (inputVectorSizes.empty()) {
     ArrayRef<int64_t> resultTensorShape = packOp.getDestType().getShape();
     inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank());
-    useInBoundsInsteadOfMasking = false;
+    useInBoundsInsteadOfMasking = true;
   }
 
   // Create masked TransferReadOp.
@@ -1612,7 +1612,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
   // to shape of source, then a mask is necessary.
   Value readResult = vector::createReadOrMaskedRead(
       rewriter, loc, unpackOp.getSource(),
-      ArrayRef<int64_t>(readMaskShape.begin(), readMaskShape.end()), padValue);
+      ArrayRef<int64_t>(readMaskShape.begin(), readMaskShape.end()), padValue,
+      /*useInBoundsInsteadOfMasking=*/false);
 
   PackingMetadata packMetadata;
   SmallVector<int64_t> lastDimToInsertPosPerm =
@@ -1669,7 +1670,8 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
   (void)status; // prevent unused variable warning on non-assert builds
   assert(succeeded(status) && "failed to reify result shapes");
   auto maskedRead = vector::createReadOrMaskedRead(
-      rewriter, loc, padOp.getSource(), inputVectorSizes, padValue);
+      rewriter, loc, padOp.getSource(), inputVectorSizes, padValue,
+      /*useInBoundsInsteadOfMasking=*/false);
   Operation *write = createWriteOrMaskedWrite(
       rewriter, loc, maskedRead, reifiedReturnShapes[0], inputVectorSizes);
   newResults.push_back(write->getResult(0));
diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
index 05b813a3b1e908..450bfa0cec0c7f 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp
@@ -20,25 +20,6 @@
 
 using namespace mlir;
 
-/// Generate an error message string for the given op and the specified error.
-static std::string generateErrorMessage(Operation *op, const std::string &msg) {
-  std::string buffer;
-  llvm::raw_string_ostream stream(buffer);
-  OpPrintingFlags flags;
-  // We may generate a lot of error messages and so we need to ensure the
-  // printing is fast.
-  flags.elideLargeElementsAttrs();
-  flags.printGenericOpForm();
-  flags.skipRegions();
-  flags.useLocalScope();
-  stream << "ERROR: Runtime op verification failed\n";
-  op->print(stream, flags);
-  stream << "\n^ " << msg;
-  stream << "\nLocation: ";
-  op->getLoc().print(stream);
-  return stream.str();
-}
-
 namespace mlir {
 namespace memref {
 namespace {
@@ -62,8 +43,10 @@ struct CastOpInterface
           builder.create<arith::ConstantIndexOp>(loc, resultType.getRank());
       Value isSameRank = builder.create<arith::CmpIOp>(
           loc, arith::CmpIPredicate::eq, srcRank, resultRank);
-      builder.create<cf::AssertOp>(loc, isSameRank,
-                                   generateErrorMessage(op, "rank mismatch"));
+      builder.create<cf::AssertOp>(
+          loc, isSameRank,
+          RuntimeVerifiableOpInterface::generateErrorMessage(op,
+                                                             "rank mismatch"));
     }
 
     // Get source offset and strides. We do not have an op to get offsets and
@@ -101,8 +84,8 @@ struct CastOpInterface
           loc, arith::CmpIPredicate::eq, srcDimSz, resultDimSz);
       builder.create<cf::AssertOp>(
           loc, isSameSz,
-          generateErrorMessage(op, "size mismatch of dim " +
-                                       std::to_string(it.index())));
+          RuntimeVerifiableOpInterface::generateErrorMessage(
+              op, "size mismatch of dim " + std::to_string(it.index())));
     }
 
     // Get result offset and strides.
@@ -119,8 +102,10 @@ struct CastOpInterface
           builder.create<arith::ConstantIndexOp>(loc, resultOffset);
       Value isSameOffset = builder.create<arith::CmpIOp>(
           loc, arith::CmpIPredicate::eq, srcOffset, resultOffsetVal);
-      builder.create<cf::AssertOp>(loc, isSameOffset,
-                                   generateErrorMessage(op, "offset mismatch"));
+      builder.create<cf::AssertOp>(
+          loc, isSameOffset,
+          RuntimeVerifiableOpInterface::generateErrorMessage(
+              op, "offset mismatch"));
     }
 
     // Check strides.
@@ -137,8 +122,8 @@ struct CastOpInterface
           loc, arith::CmpIPredicate::eq, srcStride, resultStrideVal);
       builder.create<cf::AssertOp>(
           loc, isSameStride,
-          generateErrorMessage(op, "stride mismatch of dim " +
-                                       std::to_string(it.index())));
+          RuntimeVerifiableOpInterface::generateErrorMessage(
+              op, "stride mismatch of dim " + std::to_string(it.index())));
     }
   }
 };
@@ -178,7 +163,9 @@ struct LoadStoreOpInterface
                 : andOp;
     }
     builder.create<cf::AssertOp>(
-        loc, assertCond, generateErrorMessage(op, "out-of-bounds access"));
+        loc, assertCond,
+        RuntimeVerifiableOpInterface::generateErrorMessage(
+            op, "out-of-bounds access"));
   }
 };
 
@@ -248,7 +235,7 @@ struct ReinterpretCastOpInterface
 
     builder.create<cf::AssertOp>(
         loc, assertCond,
-        generateErrorMessage(
+        RuntimeVerifiableOpInterface::generateErrorMessage(
             op,
             "result of reinterpret_cast is out-of-bounds of the base memref"));
   }
@@ -293,8 +280,8 @@ struct SubViewOpInterface
 
     builder.create<cf::AssertOp>(
         loc, assertCond,
-        generateErrorMessage(op,
-                             "subview is out-of-bounds of the base memref"));
+        RuntimeVerifiableOpInterface::generateErrorMessage(
+            op, "subview is out-of-bounds of the base memref"));
   }
 };
 
@@ -334,8 +321,9 @@ struct ExpandShapeOpInterface
           builder.create<arith::ConstantIndexOp>(loc, 0));
       builder.create<cf::AssertOp>(
           loc, isModZero,
-          generateErrorMessage(op, "static result dims in reassoc group do not "
-                                   "divide src dim evenly"));
+          RuntimeVerifiableOpInterface::generateErrorMessage(
+              op, "static result dims in reassoc group do not "
+                  "divide src dim evenly"));
     }
   }
 };
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 3ff41ab22fbc42..5029ed4aa0387a 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -1609,6 +1609,9 @@ OpFoldResult ReshapeOp::fold(FoldAdaptor adaptor) {
             cst.has_value() && cst.value() == static_cast<int64_t>(id);
         continue;
       }
+
+      dynamicNoop = false;
+      break;
     }
 
     if (dynamicNoop)
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp
index 2f21c50c63473b..ac576ed0b4f097 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp
@@ -437,8 +437,10 @@ struct OneDimMultiReductionToTwoDim
     auto loc = multiReductionOp.getLoc();
     auto srcVectorType = multiReductionOp.getSourceVectorType();
     auto srcShape = srcVectorType.getShape();
-    auto castedType = VectorType::get(ArrayRef<int64_t>{1, srcShape.back()},
-                                      srcVectorType.getElementType());
+    auto castedType = VectorType::get(
+        ArrayRef<int64_t>{1, srcShape.back()}, srcVectorType.getElementType(),
+        ArrayRef<bool>{false, srcVectorType.getScalableDims().back()});
+
     auto accType =
         VectorType::get(ArrayRef<int64_t>{1}, srcVectorType.getElementType());
     assert(!llvm::isa<VectorType>(multiReductionOp.getDestType()) &&
@@ -455,10 +457,11 @@ struct OneDimMultiReductionToTwoDim
         loc, accType, multiReductionOp.getAcc());
     Value castMask;
     if (maskableOp.isMasked()) {
-      auto maskType = llvm::cast<ShapedType>(mask.getType());
-      auto castMaskType =
-          VectorType::get(ArrayRef<int64_t>{1, maskType.getShape().back()},
-                          maskType.getElementType());
+      auto maskType = llvm::cast<VectorType>(mask.getType());
+      auto castMaskType = VectorType::get(
+          ArrayRef<int64_t>{1, maskType.getShape().back()},
+          maskType.getElementType(),
+          ArrayRef<bool>{false, maskType.getScalableDims().back()});
       castMask = rewriter.create<vector::BroadcastOp>(loc, castMaskType, mask);
     }
 
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index fcaf1ec944b479..6727f3f461722b 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -345,7 +345,7 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
   int64_t readRank = readShape.size();
   auto zero = builder.create<arith::ConstantIndexOp>(loc, 0);
   SmallVector<bool> inBoundsVal(readRank, true);
-  if (!useInBoundsInsteadOfMasking) {
+  if (useInBoundsInsteadOfMasking) {
     // Update the inBounds attribute.
     for (unsigned i = 0; i < readRank; i++)
       inBoundsVal[i] = (sourceShape[i] == readShape[i]) &&
@@ -359,7 +359,7 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
       /*padding=*/padValue,
       /*inBounds=*/inBoundsVal);
 
-  if (llvm::equal(readShape, sourceShape) || !useInBoundsInsteadOfMasking)
+  if (llvm::equal(readShape, sourceShape) || useInBoundsInsteadOfMasking)
     return transferReadOp;
   SmallVector<OpFoldResult> mixedSourceDims =
       tensor::getMixedSizes(builder, loc, source);
diff --git a/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp b/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp
index 9205d8d8c34a29..561e8d33868748 100644
--- a/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp
+++ b/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp
@@ -11,6 +11,27 @@
 namespace mlir {
 class Location;
 class OpBuilder;
+
+/// Generate an error message string for the given op and the specified error.
+std::string
+RuntimeVerifiableOpInterface::generateErrorMessage(Operation *op,
+                                                   const std::string &msg) {
+  std::string buffer;
+  llvm::raw_string_ostream stream(buffer);
+  OpPrintingFlags flags;
+  // We may generate a lot of error messages and so we need to ensure the
+  // printing is fast.
+  flags.elideLargeElementsAttrs();
+  flags.printGenericOpForm();
+  flags.skipRegions();
+  flags.useLocalScope();
+  stream << "ERROR: Runtime op verification failed\n";
+  op->print(stream, flags);
+  stream << "\n^ " << msg;
+  stream << "\nLocation: ";
+  op->getLoc().print(stream);
+  return stream.str();
+}
 } // namespace mlir
 
 /// Include the definitions of the interface.
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index 339c5f3825165d..64dea35614c070 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -51,12 +51,12 @@ class Reply {
 
 Reply::Reply(const llvm::json::Value &id, llvm::StringRef method,
              JSONTransport &transport, std::mutex &transportOutputMutex)
-    : method(method), id(id), transport(&transport),
+    : id(id), transport(&transport),
       transportOutputMutex(transportOutputMutex) {}
 
 Reply::Reply(Reply &&other)
-    : method(other.method), replied(other.replied.load()),
-      id(std::move(other.id)), transport(other.transport),
+    : replied(other.replied.load()), id(std::move(other.id)),
+      transport(other.transport),
       transportOutputMutex(other.transportOutputMutex) {
   other.transport = nullptr;
 }
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 24c7bdd9e1050e..4bfed475d44f60 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1062,6 +1062,28 @@ module attributes { transform.target_tag = "start_here" } {
     return %result : tensor<10x18x15xf64>
   }
 
+  func.func @convolution_depthwise(%input: tensor<1x10x196x48xf32>, %filter: tensor<1x4x48xf32>) -> tensor<1x10x191x48xf32> {
+    %cst = arith.constant 0.0 : f32 
+    %empty = tensor.empty() : tensor<1x10x191x48xf32>
+    %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<1x10x191x48xf32>) -> tensor<1x10x191x48xf32>
+    // expected-remark @below {{convolution}}
+    // expected-remark @below {{batch dims 0}}
+    // expected-remark @below {{output image dims 1 : i64, 2 : i64}}
+    // expected-remark @below {{output channel dims}}
+    // expected-remark @below {{filter loop dims 4 : i64, 5 : i64}}
+    // expected-remark @below {{input channel dims}}
+    // expected-remark @below {{depth dims 3}}
+    // expected-remark @below {{strides 1 : i64, 1 : i64}}
+    // expected-remark @below {{dilations 1 : i64, 1 : i64}}
+    %result = linalg.depthwise_conv_2d_nhwc_hwc {
+      dilations = dense<1> : tensor<2xi64>,
+      strides = dense<1> : tensor<2xi64>}
+      ins(%input, %filter : tensor<1x10x196x48xf32>, tensor<1x4x48xf32>)
+      outs(%fill : tensor<1x10x191x48xf32>) -> tensor<1x10x191x48xf32>
+
+    return %result : tensor<1x10x191x48xf32>
+  }
+
   func.func @convolution_multi_channel(%input: tensor<2x34x68x16xf32>, %filter: tensor<8x2x3x5x16x16xf32>) -> tensor<8x32x32x16xf32> {
     %cst = arith.constant 0.0 : f32
     %empty = tensor.empty() : tensor<8x32x32x16xf32>
diff --git a/mlir/test/Dialect/Linalg/runtime-verification.mlir b/mlir/test/Dialect/Linalg/runtime-verification.mlir
new file mode 100644
index 00000000000000..a4f29d8457e589
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/runtime-verification.mlir
@@ -0,0 +1,43 @@
+// RUN: mlir-opt %s -generate-runtime-verification | FileCheck %s
+
+// Most of the tests for linalg runtime-verification are implemented as integration tests.
+
+#identity = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: @static_dims
+func.func @static_dims(%arg0: tensor<5xf32>, %arg1: tensor<5xf32>) -> (tensor<5xf32>) {
+    // CHECK: %[[TRUE:.*]] = index.bool.constant true
+    // CHECK: cf.assert %[[TRUE]]
+    %result = tensor.empty() : tensor<5xf32> 
+    %0 = linalg.generic {
+      indexing_maps = [#identity, #identity, #identity],
+      iterator_types = ["parallel"]
+    } ins(%arg0, %arg1 : tensor<5xf32>, tensor<5xf32>)
+      outs(%result : tensor<5xf32>) {
+      ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) :
+        %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32
+        linalg.yield %tmp1 : f32
+    } -> tensor<5xf32>
+    return %0 : tensor<5xf32>
+}
+
+// -----
+
+#map = affine_map<() -> ()>
+
+// CHECK-LABEL: @scalars
+func.func @scalars(%arg0: tensor<f32>, %arg1: tensor<f32>) -> (tensor<f32>) {
+    // No runtime checks are required if the operands are all scalars
+    // CHECK-NOT: cf.assert
+    %result = tensor.empty() : tensor<f32> 
+    %0 = linalg.generic {
+      indexing_maps = [#map, #map, #map],
+      iterator_types = []
+    } ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
+      outs(%result : tensor<f32>) {
+      ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) :
+        %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32
+        linalg.yield %tmp1 : f32
+    } -> tensor<f32>
+    return %0 : tensor<f32>
+}
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 751c57eacd7ae5..9a4dd2f3b5cc11 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -2431,6 +2431,15 @@ func.func @reshape_nofold_2d(%arg0 : tensor<?x?xi32>) -> tensor<?x?xi32> {
   return %reshape : tensor<?x?xi32>
 }
 
+// -----
+
+// CHECK-LABEL: @reshape_nofold_2d_ins
+func.func @reshape_nofold_2d_ins(%arg0 : tensor<?x?xi32>, %arg1: index, %arg2: index) -> tensor<?x?xi32> {
+  %ds = tensor.from_elements %arg1, %arg2 : tensor<2xindex>
+  // CHECK: tensor.reshape
+  %reshape = tensor.reshape %arg0(%ds) : (tensor<?x?xi32>, tensor<2xindex>) -> tensor<?x?xi32>
+  return %reshape : tensor<?x?xi32>
+}
 
 // -----
 
diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir
index 22808aa7d6acc3..f70d23a1932297 100644
--- a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir
@@ -281,6 +281,23 @@ func.func private @scalable_dims(%A : vector<8x[4]x2xf32>, %B: vector<8x[4]xf32>
 // CHECK:           %[[VAL_163:.*]] = vector.shape_cast %[[VAL_162]] : vector<[32]xf32> to vector<8x[4]xf32>
 // CHECK:           return %[[VAL_163]] : vector<8x[4]xf32>
 
+// Check that OneDimMultiReductionToTwoDim handles scalable dim
+func.func @scalable_dim_1d(%A: vector<[4]xf32>, %B: f32, %C: vector<[4]xi1>) -> f32 {
+    %0 = vector.mask %C { vector.multi_reduction <add>, %A, %B [0] : vector<[4]xf32> to f32 } : vector<[4]xi1> -> f32
+    return %0 : f32
+}
+
+// CHECK-LABEL:  func.func @scalable_dim_1d(
+// CHECK-SAME:                                      %[[ARG_0:.*]]: vector<[4]xf32>,
+// CHECK-SAME:                                      %[[ARG_1:.*]]: f32,
+// CHECK-SAME:                                      %[[ARG_2:.*]]: vector<[4]xi1>) -> f32 {
+// CHECK-DAG:      %[[VAL_0:.*]] = arith.constant 0 : index
+// CHECK-DAG:      %[[VAL_1:.*]] = arith.constant dense<0.000000e+00> : vector<1xf32>
+// CHECK:          %[[VAL_2:.*]] = vector.mask %[[ARG_2]] { vector.reduction <add>, %[[ARG_0]], %[[ARG_1]] : vector<[4]xf32> into f32 } : vector<[4]xi1> -> f32
+// CHECK:          %[[VAL_3:.*]] = vector.insertelement %[[VAL_2]], %[[VAL_1]][%[[VAL_0]] : index] : vector<1xf32>
+// CHECK:          %[[VAL_4:.*]] = vector.extract %[[VAL_3]][0] : f32 from vector<1xf32>
+// CHECK:          return %[[VAL_4]] : f32
+
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
     %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func">
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir
new file mode 100644
index 00000000000000..b05ef9422e5967
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir
@@ -0,0 +1,298 @@
+// RUN: mlir-opt %s -generate-runtime-verification \
+// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
+// RUN: -convert-linalg-to-loops \
+// RUN: -expand-strided-metadata \
+// RUN: -lower-affine \
+// RUN: -convert-scf-to-cf \
+// RUN: -test-cf-assert \
+// RUN: -convert-index-to-llvm \
+// RUN: -finalize-memref-to-llvm \
+// RUN: -convert-func-to-llvm \
+// RUN: -reconcile-unrealized-casts | \
+// RUN: mlir-cpu-runner -e main -entry-point-result=void \
+// RUN:     -shared-libs=%mlir_runner_utils \
+// RUN:     -shared-libs=%mlir_c_runner_utils 2>&1 | \
+// RUN: FileCheck %s
+
+func.func @main() {
+  %c5x = arith.constant dense<0.0> : tensor<5xf32>
+  %c4x = arith.constant dense<0.0> : tensor<4xf32>
+  %d5x = tensor.cast %c5x : tensor<5xf32> to tensor<?xf32>
+  %d4x = tensor.cast %c4x : tensor<4xf32> to tensor<?xf32>
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @simple_add(%d5x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
+  func.call @simple_add(%d5x, %d4x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
+  func.call @simple_add(%d4x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
+
+  %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32>
+  %c1x4 = arith.constant dense<0.0> : tensor<1x4xf32>
+  %c4x4 = arith.constant dense<0.0> : tensor<4x4xf32>
+  %c4x5 = arith.constant dense<0.0> : tensor<4x5xf32>
+  %c5x4 = arith.constant dense<0.0> : tensor<5x4xf32>
+  %d1x1 = tensor.cast %c1x1 : tensor<1x1xf32> to tensor<?x?xf32>
+  %d1x4 = tensor.cast %c1x4 : tensor<1x4xf32> to tensor<?x?xf32>
+  %d4x4 = tensor.cast %c4x4 : tensor<4x4xf32> to tensor<?x?xf32>
+  %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor<?x?xf32>
+  %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor<?x?xf32>
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @broadcast_add(%d1x1, %d1x1) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @broadcast_add(%d1x1, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @broadcast_add(%d4x4, %d1x4) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size
+  func.call @broadcast_add(%d1x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size
+  func.call @broadcast_add(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @matmul_generic(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
+  func.call @matmul_generic(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @matmul_named(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.matmul
+  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
+  func.call @matmul_named(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
+
+  %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32>
+  %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32>
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>)
+
+  // CHECK-NOT: ERROR: Runtime op verification failed
+  func.call @reverse_from_3(%d4x) : (tensor<?xf32>) -> (tensor<?xf32>)
+
+  // CHECK: ERROR: Runtime op verification failed
+  // CHECK: linalg.generic
+  // CHECK: unexpected negative result on dimension #0 of input/output operand #0
+  func.call @reverse_from_3(%d5x) : (tensor<?xf32>) -> (tensor<?xf32>)
+
+  return
+}
+
+
+#identity1D = affine_map<(d0) -> (d0)>
+
+func.func @simple_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> (tensor<?xf32>) {
+    %c0 = arith.constant 0 : index
+    %dim = tensor.dim %arg0, %c0 : tensor<?xf32>
+    %result = tensor.empty(%dim) : tensor<?xf32> 
+    %0 = linalg.generic {
+      indexing_maps = [#identity1D, #identity1D, #identity1D],
+      iterator_types = ["parallel"]
+    } ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
+      outs(%result : tensor<?xf32>) {
+      ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) :
+        %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32
+        linalg.yield %tmp1 : f32
+    } -> tensor<?xf32>
+    return %0 : tensor<?xf32>
+}
+
+#broadcastD0 = affine_map<(d0, d1) -> (0, d1)>
+#broadcastD1 = affine_map<(d0, d1) -> (d0, 0)>
+#identity2D = affine_map<(d0, d1) -> (d0, d1)>
+
+func.func @broadcast_add(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ // Calculate maximum dimension 0
+  %c0 = arith.constant 0 : index
+  %dim = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %dim_0 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
+  %0 = arith.maxui %dim, %dim_0 : index
+
+  // Calculate maximum dimension 1
+  %c1 = arith.constant 1 : index
+  %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
+  %dim_2 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
+  %1 = arith.maxui %dim_1, %dim_2 : index
+
+  // Broadcast dimension 0 of %arg0
+  %dim_3 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
+  %2 = arith.cmpi eq, %dim_3, %c1 : index
+  %3 = scf.if %2 -> (tensor<?x?xf32>) {
+    %dim_7 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
+    %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32>
+    %13 = linalg.generic {
+      indexing_maps = [#broadcastD0, #identity2D],
+      iterator_types = ["parallel", "parallel"]
+    } ins(%arg0 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<?x?xf32>
+    scf.yield %13 : tensor<?x?xf32>
+  } else {
+    scf.yield %arg0 : tensor<?x?xf32>
+  }
+
+  // Broadcast dimension 1 of %arg0
+  %dim_4 = tensor.dim %3, %c1 : tensor<?x?xf32>
+  %4 = arith.cmpi eq, %dim_4, %c1 : index
+  %5 = scf.if %4 -> (tensor<?x?xf32>) {
+    %dim_7 = tensor.dim %3, %c0 : tensor<?x?xf32>
+    %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32>
+    %13 = linalg.generic {
+      indexing_maps = [#broadcastD1, #identity2D],
+      iterator_types = ["parallel", "parallel"]
+    } ins(%3 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<?x?xf32>
+    scf.yield %13 : tensor<?x?xf32>
+  } else {
+    scf.yield %3 : tensor<?x?xf32>
+  }
+
+  // Broadcast dimension 0 of %arg1
+  %dim_5 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
+  %6 = arith.cmpi eq, %dim_5, %c1 : index
+  %7 = scf.if %6 -> (tensor<?x?xf32>) {
+    %dim_7 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
+    %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32>
+    %13 = linalg.generic {
+      indexing_maps = [#broadcastD0, #identity2D],
+      iterator_types = ["parallel", "parallel"]
+    } ins(%arg1 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<?x?xf32>
+    scf.yield %13 : tensor<?x?xf32>
+  } else {
+    scf.yield %arg1 : tensor<?x?xf32>
+  }
+
+  // Broadcast dimension 1 of %arg1
+  %dim_6 = tensor.dim %7, %c1 : tensor<?x?xf32>
+  %8 = arith.cmpi eq, %dim_6, %c1 : index
+  %9 = scf.if %8 -> (tensor<?x?xf32>) {
+    %dim_7 = tensor.dim %7, %c0 : tensor<?x?xf32>
+    %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32>
+    %13 = linalg.generic {
+      indexing_maps = [#broadcastD1, #identity2D],
+      iterator_types = ["parallel", "parallel"]
+    } ins(%7 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
+    ^bb0(%in: f32, %out: f32):
+      linalg.yield %in : f32
+    } -> tensor<?x?xf32>
+    scf.yield %13 : tensor<?x?xf32>
+  } else {
+    scf.yield %7 : tensor<?x?xf32>
+  }
+
+  // Perform element-wise computation
+  %10 = tensor.empty(%0, %1) : tensor<?x?xf32>
+  %11 = linalg.generic {
+    indexing_maps = [#identity2D, #identity2D, #identity2D],
+    iterator_types = ["parallel", "parallel"]
+  } ins(%5, %9 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%10 : tensor<?x?xf32>) {
+  ^bb0(%in: f32, %in_7: f32, %out: f32):
+    %12 = arith.addf %in, %in_7 : f32
+    linalg.yield %12 : f32
+  } -> tensor<?x?xf32>
+  return %11 : tensor<?x?xf32>
+}
+
+#matmul_accesses = [
+  affine_map<(m, n, k) -> (m, k)>,
+  affine_map<(m, n, k) -> (k, n)>,
+  affine_map<(m, n, k) -> (m, n)>
+]
+#matmul_trait = {
+  iterator_types = ["parallel", "parallel", "reduction"],
+  indexing_maps = #matmul_accesses
+}
+
+func.func @matmul_generic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %cf0 = arith.constant 0.0 : f32 
+  %ci0 = arith.constant 0 : index 
+  %ci1 = arith.constant 1 : index 
+  %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32>
+  %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32>
+  %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32>
+  %0 = linalg.generic #matmul_trait ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) {
+  ^bb0(%in: f32, %in_0: f32, %out: f32):
+    %1 = arith.mulf %in, %in_0 : f32
+    %2 = arith.addf %out, %1 : f32
+    linalg.yield %2 : f32
+  } -> tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+func.func @matmul_named(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %cf0 = arith.constant 0.0 : f32 
+  %ci0 = arith.constant 0 : index 
+  %ci1 = arith.constant 1 : index 
+  %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32>
+  %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32>
+  %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32>
+  %0 = linalg.matmul  ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) -> tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+#conv_trait = {
+  indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d2, d1 * 4 + d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
+  iterator_types = ["parallel", "parallel", "reduction", "reduction"]
+}
+
+func.func @conv(%arg0: tensor<16x29xf32>, %arg1: tensor<3x4xf32>) -> (tensor<5x7xf32>) {
+  %c0 = arith.constant 0.0 : f32 
+  %splat = tensor.splat %c0 : tensor<5x7xf32>
+  %result = linalg.generic #conv_trait ins(%arg0, %arg1 : tensor<16x29xf32>, tensor<3x4xf32>) outs(%splat : tensor<5x7xf32>) {
+  ^bb0(%in: f32, %in_64: f32, %out: f32):
+    %5 = arith.mulf %in, %in_64 : f32
+    %6 = arith.addf %out, %5 : f32
+    linalg.yield %6 : f32
+  } -> tensor<5x7xf32>
+  return %result : tensor<5x7xf32>
+}
+
+#reverse_trait = {
+  indexing_maps = [
+          affine_map<(i) -> (3 - i)>,
+          affine_map<(i) -> (i)>
+  ],
+  iterator_types = ["parallel"]
+}
+
+func.func @reverse_from_3(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
+  %cf0 = arith.constant 0.0 : f32 
+  %ci0 = arith.constant 0 : index 
+  %d0 = tensor.dim %arg0, %ci0 : tensor<?xf32>
+  %splat = tensor.splat %cf0[%d0] : tensor<?xf32>
+  %result = linalg.generic #reverse_trait ins(%arg0: tensor<?xf32>) outs(%splat: tensor<?xf32>) {
+    ^bb0(%a: f32, %b: f32):
+    linalg.yield %a : f32
+  } -> tensor<?xf32>
+  return %result : tensor<?xf32>
+}
diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt
index 6d8aa290e82f25..6fad249a0b2fba 100644
--- a/mlir/unittests/CMakeLists.txt
+++ b/mlir/unittests/CMakeLists.txt
@@ -20,7 +20,6 @@ add_subdirectory(Support)
 add_subdirectory(Rewrite)
 add_subdirectory(TableGen)
 add_subdirectory(Target)
-add_subdirectory(Tools)
 add_subdirectory(Transforms)
 
 if(MLIR_ENABLE_EXECUTION_ENGINE)
diff --git a/mlir/unittests/Tools/CMakeLists.txt b/mlir/unittests/Tools/CMakeLists.txt
deleted file mode 100644
index a97588d9286685..00000000000000
--- a/mlir/unittests/Tools/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_subdirectory(lsp-server-support)
diff --git a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt b/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt
deleted file mode 100644
index 3aa8b9c4bc7735..00000000000000
--- a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_mlir_unittest(MLIRLspServerSupportTests
-  Transport.cpp
-)
-target_link_libraries(MLIRLspServerSupportTests
-  PRIVATE
-  MLIRLspServerSupportLib)
diff --git a/mlir/unittests/Tools/lsp-server-support/Transport.cpp b/mlir/unittests/Tools/lsp-server-support/Transport.cpp
deleted file mode 100644
index 48eae32a0fc3a4..00000000000000
--- a/mlir/unittests/Tools/lsp-server-support/Transport.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-//===- Transport.cpp - LSP JSON transport unit tests ----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Tools/lsp-server-support/Transport.h"
-#include "mlir/Tools/lsp-server-support/Logging.h"
-#include "mlir/Tools/lsp-server-support/Protocol.h"
-#include "llvm/Support/FileSystem.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-using namespace mlir;
-using namespace mlir::lsp;
-using namespace testing;
-
-namespace {
-
-TEST(TransportTest, SendReply) {
-  std::string out;
-  llvm::raw_string_ostream os(out);
-  JSONTransport transport(nullptr, os);
-  MessageHandler handler(transport);
-
-  transport.reply(1989, nullptr);
-  EXPECT_THAT(out, HasSubstr("\"id\":1989"));
-  EXPECT_THAT(out, HasSubstr("\"result\":null"));
-}
-
-class TransportInputTest : public Test {
-  std::optional<llvm::sys::fs::TempFile> inputTempFile;
-  std::FILE *in = nullptr;
-  std::string output = "";
-  llvm::raw_string_ostream os;
-  std::optional<JSONTransport> transport = std::nullopt;
-  std::optional<MessageHandler> messageHandler = std::nullopt;
-
-protected:
-  TransportInputTest() : os(output) {}
-
-  void SetUp() override {
-    auto tempOr = llvm::sys::fs::TempFile::create("lsp-unittest-%%%%%%.json");
-    ASSERT_TRUE((bool)tempOr);
-    llvm::sys::fs::TempFile t = std::move(*tempOr);
-    inputTempFile = std::move(t);
-
-    in = std::fopen(inputTempFile->TmpName.c_str(), "r");
-    transport.emplace(in, os, JSONStreamStyle::Delimited);
-    messageHandler.emplace(*transport);
-  }
-
-  void TearDown() override {
-    EXPECT_FALSE(inputTempFile->discard());
-    EXPECT_EQ(std::fclose(in), 0);
-  }
-
-  void writeInput(StringRef buffer) {
-    std::error_code ec;
-    llvm::raw_fd_ostream os(inputTempFile->TmpName, ec);
-    ASSERT_FALSE(ec);
-    os << buffer;
-    os.close();
-  }
-
-  StringRef getOutput() const { return output; }
-  MessageHandler &getMessageHandler() { return *messageHandler; }
-
-  void runTransport() {
-    bool gotEOF = false;
-    llvm::Error err = llvm::handleErrors(
-        transport->run(*messageHandler), [&](const llvm::ECError &ecErr) {
-          gotEOF = ecErr.convertToErrorCode() == std::errc::io_error;
-        });
-    llvm::consumeError(std::move(err));
-    EXPECT_TRUE(gotEOF);
-  }
-};
-
-TEST_F(TransportInputTest, RequestWithInvalidParams) {
-  struct Handler {
-    void onMethod(const TextDocumentItem &params,
-                  mlir::lsp::Callback<TextDocumentIdentifier> callback) {}
-  } handler;
-  getMessageHandler().method("invalid-params-request", &handler,
-                             &Handler::onMethod);
-
-  writeInput("{\"jsonrpc\":\"2.0\",\"id\":92,"
-             "\"method\":\"invalid-params-request\",\"params\":{}}\n");
-  runTransport();
-  EXPECT_THAT(getOutput(), HasSubstr("error"));
-  EXPECT_THAT(getOutput(), HasSubstr("missing value at (root).uri"));
-}
-
-TEST_F(TransportInputTest, NotificationWithInvalidParams) {
-  // JSON parsing errors are only reported via error logging. As a result, this
-  // test can't make any expectations -- but it prints the output anyway, by way
-  // of demonstration.
-  Logger::setLogLevel(Logger::Level::Error);
-
-  struct Handler {
-    void onNotification(const TextDocumentItem &params) {}
-  } handler;
-  getMessageHandler().notification("invalid-params-notification", &handler,
-                                   &Handler::onNotification);
-
-  writeInput("{\"jsonrpc\":\"2.0\",\"method\":\"invalid-params-notification\","
-             "\"params\":{}}\n");
-  runTransport();
-}
-
-TEST_F(TransportInputTest, MethodNotFound) {
-  writeInput("{\"jsonrpc\":\"2.0\",\"id\":29,\"method\":\"ack\"}\n");
-  runTransport();
-  EXPECT_THAT(getOutput(), HasSubstr("\"id\":29"));
-  EXPECT_THAT(getOutput(), HasSubstr("\"error\""));
-  EXPECT_THAT(getOutput(), HasSubstr("\"message\":\"method not found: ack\""));
-}
-} // namespace
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 3eb0f67bbd88a5..6a6f8fc1341002 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -11104,6 +11104,7 @@ cc_library(
         ":FuncTransforms",
         ":GPUDialect",
         ":IR",
+        ":IndexDialect",
         ":LinalgDialect",
         ":LinalgPassIncGen",
         ":LinalgStructuredOpsIncGen",
@@ -11115,6 +11116,7 @@ cc_library(
         ":MeshShardingInterface",
         ":MeshTransforms",
         ":Pass",
+        ":RuntimeVerifiableOpInterface",
         ":SCFDialect",
         ":SCFTransforms",
         ":SCFUtils",