From e24a7bbf4515213f44d410bfc41b3dff27c49c86 Mon Sep 17 00:00:00 2001
From: Brian Gesiak <brian@modocache.io>
Date: Mon, 29 Apr 2024 18:43:08 -0400
Subject: [PATCH 01/65] [mlir-lsp] Support outgoing requests (#90078)

Add support for outgoing requests to `lsp::MessageHandler`. Much like
`MessageHandler::outgoingNotification`, this allows for the message
handler to send outgoing messages via its JSON transport, but in this
case, those messages are requests, not notifications.

Requests receive responses (also referred to as "replies" in
`MLIRLspServerSupportLib`). These were previously unsupported, and
`lsp::MessageHandler` would log an error each time it processed a JSON
message that appeared to be a response (something with an "id" field,
but no "method" field). However, the `outgoingRequest` method now
handles response callbacks: an outgoing request with a given ID is set
up such that a callback function is invoked when a response with that ID
is received.
---
 .../mlir/Tools/lsp-server-support/Transport.h | 41 +++++++++++++++++++
 .../Tools/lsp-server-support/Transport.cpp    | 38 ++++++++++-------
 .../Tools/lsp-server-support/Transport.cpp    | 39 ++++++++++++++++++
 3 files changed, 103 insertions(+), 15 deletions(-)
diff --git a/mlir/include/mlir/Tools/lsp-server-support/Transport.h b/mlir/include/mlir/Tools/lsp-server-support/Transport.h
index 44c71058cf717c..047d174234df8d 100644
--- a/mlir/include/mlir/Tools/lsp-server-support/Transport.h
+++ b/mlir/include/mlir/Tools/lsp-server-support/Transport.h
@@ -15,6 +15,7 @@
 #ifndef MLIR_TOOLS_LSPSERVERSUPPORT_TRANSPORT_H
 #define MLIR_TOOLS_LSPSERVERSUPPORT_TRANSPORT_H
 
+#include "mlir/Support/DebugStringHelper.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
@@ -100,6 +101,18 @@ using Callback = llvm::unique_function<void(llvm::Expected<T>)>;
 template <typename T>
 using OutgoingNotification = llvm::unique_function<void(const T &)>;
 
+/// An OutgoingRequest<T> is a function used for outgoing requests to send to
+/// the client.
+template <typename T>
+using OutgoingRequest =
+    llvm::unique_function<void(const T &, llvm::json::Value id)>;
+
+/// An `OutgoingRequestCallback` is invoked when an outgoing request to the
+/// client receives a response in turn. It is passed the original request's ID,
+/// as well as the result JSON.
+using OutgoingRequestCallback =
+    std::function<void(llvm::json::Value, llvm::Expected<llvm::json::Value>)>;
+
 /// A handler used to process the incoming transport messages.
 class MessageHandler {
 public:
@@ -170,6 +183,26 @@ class MessageHandler {
     };
   }
 
+  /// Create an OutgoingRequest function that, when called, sends a request with
+  /// the given method via the transport. Should the outgoing request be
+  /// met with a response, the response callback is invoked to handle that
+  /// response.
+  template <typename T>
+  OutgoingRequest<T> outgoingRequest(llvm::StringLiteral method,
+                                     OutgoingRequestCallback callback) {
+    return [&, method, callback](const T &params, llvm::json::Value id) {
+      {
+        std::lock_guard<std::mutex> lock(responseHandlersMutex);
+        responseHandlers.insert(
+            {debugString(id), std::make_pair(method.str(), callback)});
+      }
+
+      std::lock_guard<std::mutex> transportLock(transportOutputMutex);
+      Logger::info("--> {0}({1})", method, id);
+      transport.call(method, llvm::json::Value(params), id);
+    };
+  }
+
 private:
   template <typename HandlerT>
   using HandlerMap = llvm::StringMap<llvm::unique_function<HandlerT>>;
@@ -178,6 +211,14 @@ class MessageHandler {
   HandlerMap<void(llvm::json::Value, Callback<llvm::json::Value>)>
       methodHandlers;
 
+  /// A pair of (1) the original request's method name, and (2) the callback
+  /// function to be invoked for responses.
+  using ResponseHandlerTy = std::pair<std::string, OutgoingRequestCallback>;
+  /// A mapping from request/response ID to response handler.
+  llvm::StringMap<ResponseHandlerTy> responseHandlers;
+  /// Mutex to guard insertion into the response handler map.
+  std::mutex responseHandlersMutex;
+
   JSONTransport &transport;
 
   /// Mutex to guard sending output messages to the transport.
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index 339c5f3825165d..1e90ab32281f54 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -117,21 +117,29 @@ bool MessageHandler::onCall(llvm::StringRef method, llvm::json::Value params,
 
 bool MessageHandler::onReply(llvm::json::Value id,
                              llvm::Expected<llvm::json::Value> result) {
-  // TODO: Add support for reply callbacks when support for outgoing messages is
-  // added. For now, we just log an error on any replies received.
-  Callback<llvm::json::Value> replyHandler =
-      [&id](llvm::Expected<llvm::json::Value> result) {
-        Logger::error(
-            "received a reply with ID {0}, but there was no such call", id);
-        if (!result)
-          llvm::consumeError(result.takeError());
-      };
-
-  // Log and run the reply handler.
-  if (result)
-    replyHandler(std::move(result));
-  else
-    replyHandler(result.takeError());
+  // Find the response handler in the mapping. If it exists, move it out of the
+  // mapping and erase it.
+  ResponseHandlerTy responseHandler;
+  {
+    std::lock_guard<std::mutex> responseHandlersLock(responseHandlersMutex);
+    auto it = responseHandlers.find(debugString(id));
+    if (it != responseHandlers.end()) {
+      responseHandler = std::move(it->second);
+      responseHandlers.erase(it);
+    }
+  }
+
+  // If we found a response handler, invoke it. Otherwise, log an error.
+  if (responseHandler.second) {
+    Logger::info("--> reply:{0}({1})", responseHandler.first, id);
+    responseHandler.second(std::move(id), std::move(result));
+  } else {
+    Logger::error(
+        "received a reply with ID {0}, but there was no such outgoing request",
+        id);
+    if (!result)
+      llvm::consumeError(result.takeError());
+  }
   return true;
 }
 
diff --git a/mlir/unittests/Tools/lsp-server-support/Transport.cpp b/mlir/unittests/Tools/lsp-server-support/Transport.cpp
index a086964cd3660c..fee21840595232 100644
--- a/mlir/unittests/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/unittests/Tools/lsp-server-support/Transport.cpp
@@ -131,4 +131,43 @@ TEST_F(TransportInputTest, OutgoingNotification) {
   notifyFn(CompletionList{});
   EXPECT_THAT(getOutput(), HasSubstr("\"method\":\"outgoing-notification\""));
 }
+
+TEST_F(TransportInputTest, ResponseHandlerNotFound) {
+  // Unhandled responses are only reported via error logging. As a result, this
+  // test can't make any expectations -- but it prints the output anyway, by way
+  // of demonstration.
+  Logger::setLogLevel(Logger::Level::Error);
+  writeInput("{\"jsonrpc\":\"2.0\",\"id\":81,\"result\":null}\n");
+  runTransport();
+}
+
+TEST_F(TransportInputTest, OutgoingRequest) {
+  // Make some outgoing requests.
+  int responseCallbackInvoked = 0;
+  auto callFn = getMessageHandler().outgoingRequest<CompletionList>(
+      "outgoing-request",
+      [&responseCallbackInvoked](llvm::json::Value id,
+                                 llvm::Expected<llvm::json::Value> value) {
+        // Make expectations on the expected response.
+        EXPECT_EQ(id, 83);
+        ASSERT_TRUE((bool)value);
+        EXPECT_EQ(debugString(*value), "{\"foo\":6}");
+        responseCallbackInvoked += 1;
+        llvm::outs() << "here!!!\n";
+      });
+  callFn({}, 82);
+  callFn({}, 83);
+  callFn({}, 84);
+  EXPECT_THAT(getOutput(), HasSubstr("\"method\":\"outgoing-request\""));
+  EXPECT_EQ(responseCallbackInvoked, 0);
+
+  // One of the requests receives a response. The message handler handles this
+  // response by invoking the callback from above. Subsequent responses with the
+  // same ID are ignored.
+  writeInput("{\"jsonrpc\":\"2.0\",\"id\":83,\"result\":{\"foo\":6}}\n"
+             "// -----\n"
+             "{\"jsonrpc\":\"2.0\",\"id\":83,\"result\":{\"bar\":8}}\n");
+  runTransport();
+  EXPECT_EQ(responseCallbackInvoked, 1);
+}
 } // namespace

From 1b70580dd867195b0442e582eccd42abc41ee12d Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Mon, 29 Apr 2024 15:56:41 -0700
Subject: [PATCH 02/65] Skip various tests under ASAN on green dragon (#90531)

using the macOS version as a proxy. I can't reproduce any of these
failures locally, but the tests all use pexpect and probably have bad
timeout behavior under high load.
---
 lldb/packages/Python/lldbsuite/test/decorators.py        | 9 ++++++++-
 lldb/test/API/driver/batch_mode/TestBatchMode.py         | 4 ++++
 lldb/test/API/driver/job_control/TestJobControl.py       | 1 +
 lldb/test/API/driver/quit_speed/TestQuitWithProcess.py   | 2 +-
 .../iohandler/sigint/TestProcessIOHandlerInterrupt.py    | 1 +
 lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py      | 8 ++++++--
 lldb/test/API/terminal/TestSTTYBeforeAndAfter.py         | 1 +
 7 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py
index 8e13aa6a13882f..7fb88cef165356 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -206,6 +206,7 @@ def _decorateTest(
     remote=None,
     dwarf_version=None,
     setting=None,
+    asan=None,
 ):
     def fn(actual_debug_info=None):
         skip_for_os = _match_decorator_property(
@@ -256,6 +257,7 @@ def fn(actual_debug_info=None):
             )
         )
         skip_for_setting = (setting is None) or (setting in configuration.settings)
+        skip_for_asan = (asan is None) or is_running_under_asan()
 
         # For the test to be skipped, all specified (e.g. not None) parameters must be True.
         # An unspecified parameter means "any", so those are marked skip by default.  And we skip
@@ -273,6 +275,7 @@ def fn(actual_debug_info=None):
             (remote, skip_for_remote, "platform locality (remote/local)"),
             (dwarf_version, skip_for_dwarf_version, "dwarf version"),
             (setting, skip_for_setting, "setting"),
+            (asan, skip_for_asan, "running under asan"),
         ]
         reasons = []
         final_skip_result = True
@@ -331,6 +334,7 @@ def expectedFailureAll(
     remote=None,
     dwarf_version=None,
     setting=None,
+    asan=None,
 ):
     return _decorateTest(
         DecorateMode.Xfail,
@@ -348,6 +352,7 @@ def expectedFailureAll(
         remote=remote,
         dwarf_version=dwarf_version,
         setting=setting,
+        asan=asan,
     )
 
 
@@ -356,7 +361,7 @@ def expectedFailureAll(
 # for example,
 # @skipIf, skip for all platform/compiler/arch,
 # @skipIf(compiler='gcc'), skip for gcc on all platform/architecture
-# @skipIf(bugnumber, ["linux"], "gcc", ['>=', '4.9'], ['i386']), skip for gcc>=4.9 on linux with i386
+# @skipIf(bugnumber, ["linux"], "gcc", ['>=', '4.9'], ['i386']), skip for gcc>=4.9 on linux with i386 (all conditions must be true)
 def skipIf(
     bugnumber=None,
     oslist=None,
@@ -372,6 +377,7 @@ def skipIf(
     remote=None,
     dwarf_version=None,
     setting=None,
+    asan=None,
 ):
     return _decorateTest(
         DecorateMode.Skip,
@@ -389,6 +395,7 @@ def skipIf(
         remote=remote,
         dwarf_version=dwarf_version,
         setting=setting,
+        asan=asan,
     )
 
 
diff --git a/lldb/test/API/driver/batch_mode/TestBatchMode.py b/lldb/test/API/driver/batch_mode/TestBatchMode.py
index 642dd47c6d4586..bc6f2daebbab81 100644
--- a/lldb/test/API/driver/batch_mode/TestBatchMode.py
+++ b/lldb/test/API/driver/batch_mode/TestBatchMode.py
@@ -13,6 +13,7 @@
 class DriverBatchModeTest(PExpectTest):
     source = "main.c"
 
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])  # Randomly fails on buildbot
     @expectedFlakeyFreeBSD("llvm.org/pr25172 fails rarely on the buildbot")
     def test_batch_mode_run_crash(self):
@@ -50,6 +51,7 @@ def test_batch_mode_run_crash(self):
         self.expect_prompt()
         self.expect("frame variable touch_me_not", substrs=["(char *) touch_me_not"])
 
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])  # Randomly fails on buildbot
     @expectedFlakeyFreeBSD("llvm.org/pr25172 fails rarely on the buildbot")
     def test_batch_mode_run_exit(self):
@@ -86,6 +88,7 @@ def test_batch_mode_run_exit(self):
 
         child.expect(pexpect.EOF)
 
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])  # Randomly fails on buildbot
     @expectedFlakeyFreeBSD("llvm.org/pr25172 fails rarely on the buildbot")
     def test_batch_mode_launch_stop_at_entry(self):
@@ -125,6 +128,7 @@ def closeVictim(self):
             self.victim.close()
             self.victim = None
 
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])  # Randomly fails on buildbot
     @expectedFlakeyFreeBSD("llvm.org/pr25172 fails rarely on the buildbot")
     @expectedFailureNetBSD
diff --git a/lldb/test/API/driver/job_control/TestJobControl.py b/lldb/test/API/driver/job_control/TestJobControl.py
index 1a1739f4cb391d..648acb1d4730bc 100644
--- a/lldb/test/API/driver/job_control/TestJobControl.py
+++ b/lldb/test/API/driver/job_control/TestJobControl.py
@@ -8,6 +8,7 @@
 
 
 class JobControlTest(PExpectTest):
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])
     def test_job_control(self):
         def post_spawn():
diff --git a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py
index 42527c88b99213..c75ac977ea2094 100644
--- a/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py
+++ b/lldb/test/API/driver/quit_speed/TestQuitWithProcess.py
@@ -31,4 +31,4 @@ def test_run_quit(self):
         print("Got launch message")
         child.sendline("quit")
         print("sent quit")
-        child.expect(pexpect.EOF, timeout=15)
+        child.expect(pexpect.EOF, timeout=60)
diff --git a/lldb/test/API/iohandler/sigint/TestProcessIOHandlerInterrupt.py b/lldb/test/API/iohandler/sigint/TestProcessIOHandlerInterrupt.py
index 8e19d56cd0c2f9..75ac0f6c0289a4 100644
--- a/lldb/test/API/iohandler/sigint/TestProcessIOHandlerInterrupt.py
+++ b/lldb/test/API/iohandler/sigint/TestProcessIOHandlerInterrupt.py
@@ -11,6 +11,7 @@
 
 
 class TestCase(PExpectTest):
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @skipIf(compiler="clang", compiler_version=["<", "11.0"])
     @skipIf(oslist=["linux"], archs=["arm", "aarch64"])
     def test(self):
diff --git a/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py b/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py
index 15d9feb543895a..f6bda16560b962 100644
--- a/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py
+++ b/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py
@@ -20,8 +20,6 @@
 class DarwinNSLogOutputTestCase(TestBase):
     NO_DEBUG_INFO_TESTCASE = True
 
-    @skipUnlessDarwin
-    @skipIfRemote  # this test is currently written using lldb commands & assumes running on local system
     def setUp(self):
         # Call super's setUp().
         TestBase.setUp(self)
@@ -119,6 +117,9 @@ def do_test(self, expect_regexes=None, settings_commands=None):
         self.runCmd("process continue")
         self.expect(expect_regexes)
 
+    @skipIf(oslist=["linux"], archs=["arm", "aarch64"])
+    @skipUnlessDarwin
+    @skipIfRemote  # this test is currently written using lldb commands & assumes running on local system
     def test_nslog_output_is_displayed(self):
         """Test that NSLog() output shows up in the command-line debugger."""
         self.do_test(
@@ -131,6 +132,9 @@ def test_nslog_output_is_displayed(self):
         self.assertGreater(len(self.child.match.groups()), 0)
         self.assertEqual("This is a message from NSLog", self.child.match.group(1))
 
+    @skipIf(oslist=["linux"], archs=["arm", "aarch64"])
+    @skipUnlessDarwin
+    @skipIfRemote  # this test is currently written using lldb commands & assumes running on local system
     def test_nslog_output_is_suppressed_with_env_var(self):
         """Test that NSLog() output does not show up with the ignore env var."""
         # This test will only work properly on macOS 10.12+.  Skip it on earlier versions.
diff --git a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py
index 21aca5fc85d5f1..313a265319dbac 100644
--- a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py
+++ b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py
@@ -19,6 +19,7 @@ def classCleanup(cls):
         cls.RemoveTempFile("child_send2.txt")
         cls.RemoveTempFile("child_read2.txt")
 
+    @skipIf(macos_version=["<", "14.0"], asan=True)
     @add_test_categories(["pexpect"])
     @no_debug_info_test
     def test_stty_dash_a_before_and_afetr_invoking_lldb_command(self):

From c4c4e17c99f8d4f79bb9e1e3819d1d76e5e09ed1 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Tue, 30 Apr 2024 01:18:13 +0200
Subject: [PATCH 03/65] [BOLT] Use heuristic for matching split local functions
 (#90424)

Use known order of BOLT split function symbols: fragment symbols
immediately precede the parent fragment symbol.

Depends On: https://github.com/llvm/llvm-project/pull/89648

Test Plan: Added register-fragments-bolt-symbols.s
---
 bolt/lib/Rewrite/RewriteInstance.cpp          | 18 +++++++++++
 .../X86/register-fragments-bolt-symbols.s     | 32 +++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 bolt/test/X86/register-fragments-bolt-symbols.s

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1c5ce5d0a6cd0b..644d87eeca42e6 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -1513,6 +1513,23 @@ void RewriteInstance::registerFragments() {
       StopSymbol = *FSI;
 
     uint64_t ParentAddress{0};
+
+    // BOLT split fragment symbols are emitted just before the main function
+    // symbol.
+    for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
+         NextSymbol.moveNext()) {
+      StringRef Name = cantFail(NextSymbol.getName());
+      if (Name == ParentName) {
+        ParentAddress = cantFail(NextSymbol.getValue());
+        goto registerParent;
+      }
+      if (Name.starts_with(ParentName))
+        // With multi-way splitting, there are multiple fragments with different
+        // suffixes. Parent follows the last fragment.
+        continue;
+      break;
+    }
+
     // Iterate over local file symbols and check symbol names to match parent.
     for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
       if (cantFail(Symbol.getName()) == ParentName) {
@@ -1521,6 +1538,7 @@ void RewriteInstance::registerFragments() {
       }
     }
 
+registerParent:
     // No local parent is found, use global parent function.
     if (!ParentAddress)
       if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
diff --git a/bolt/test/X86/register-fragments-bolt-symbols.s b/bolt/test/X86/register-fragments-bolt-symbols.s
new file mode 100644
index 00000000000000..fa9b70e0b2d891
--- /dev/null
+++ b/bolt/test/X86/register-fragments-bolt-symbols.s
@@ -0,0 +1,32 @@
+# Test the heuristics for matching BOLT-added split functions.
+
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %S/cdsplit-symbol-names.s -o %t.main.o
+# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.chain.o
+# RUN: link_fdata %S/cdsplit-symbol-names.s %t.main.o %t.fdata
+# RUN: sed -i 's|chain|chain/2|g' %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.main.o
+# RUN: llvm-objcopy --localize-symbol=chain %t.main.o
+# RUN: %clang %cflags %t.chain.o %t.main.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=randomN \
+# RUN:   --reorder-blocks=ext-tsp --enable-bat --bolt-seed=7 --data=%t.fdata
+# RUN: llvm-objdump --syms %t.bolt | FileCheck %s --check-prefix=CHECK-SYMS
+
+# RUN: link_fdata %s %t.bolt %t.preagg PREAGG
+# PREAGG: B X:0 #chain.cold.0# 1 0
+# RUN: perf2bolt %t.bolt -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml -v=1 \
+# RUN:   | FileCheck %s --check-prefix=CHECK-REGISTER
+
+# CHECK-SYMS: l df *ABS*          [[#]] chain.s
+# CHECK-SYMS: l  F .bolt.org.text [[#]] chain
+# CHECK-SYMS: l  F .text.cold     [[#]] chain.cold.0
+# CHECK-SYMS: l  F .text          [[#]] chain
+# CHECK-SYMS: l df *ABS*          [[#]] bolt-pseudo.o
+
+# CHECK-REGISTER: BOLT-INFO: marking chain.cold.0/1(*2) as a fragment of chain/2(*2)
+
+.file "chain.s"
+        .text
+        .type   chain, @function
+chain:
+        ret
+        .size   chain, .-chain

From 5e9937d1b3ada9c686505c5f2c1e1b054ad9edc2 Mon Sep 17 00:00:00 2001
From: Michael Flanders <flanders.michaelk@gmail.com>
Date: Mon, 29 Apr 2024 18:25:45 -0500
Subject: [PATCH 04/65] [libc][math] Adds entrypoint and tests for
 nearbyintf128,scalbnf128 (#88443)

Closes #84689.

Adding @lntue for review.

I was curious about the implementation of
`round_using_current_rounding_mode` used for the `nearbyint` functions.
It has one of the rounding modes as unreachable
([here](https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/NearestIntegerOperations.h#L243)),
and I was wondering if this was okay for the `nearbyint` functions.

---------

Co-authored-by: Michael Flanders <mkf727@cs.washington.edu>
---
 libc/config/linux/aarch64/entrypoints.txt     |   2 +
 libc/config/linux/riscv/entrypoints.txt       |   2 +
 libc/config/linux/x86_64/entrypoints.txt      |   2 +
 libc/docs/math/index.rst                      |   4 +-
 libc/spec/stdc.td                             |   2 +
 libc/src/math/CMakeLists.txt                  |   2 +
 libc/src/math/generic/CMakeLists.txt          |  32 +++++-
 libc/src/math/generic/nearbyintf128.cpp       |  19 ++++
 libc/src/math/generic/scalbnf128.cpp          |  27 +++++
 libc/src/math/nearbyintf128.h                 |  20 ++++
 libc/src/math/scalbnf128.h                    |  20 ++++
 libc/test/UnitTest/FPMatcher.h                |  21 ++++
 libc/test/src/math/CMakeLists.txt             |  15 +++
 libc/test/src/math/scalbnf128_test.cpp        |  13 +++
 libc/test/src/math/smoke/CMakeLists.txt       |  68 +++++++++++-
 libc/test/src/math/smoke/NearbyIntTest.h      | 100 ++++++++++++++++++
 libc/test/src/math/smoke/nearbyint_test.cpp   |  13 +++
 .../src/math/smoke/nearbyintf128_test.cpp     |  13 +++
 libc/test/src/math/smoke/nearbyintf_test.cpp  |  13 +++
 libc/test/src/math/smoke/nearbyintl_test.cpp  |  13 +++
 libc/test/src/math/smoke/scalbnf128_test.cpp  |  13 +++
 21 files changed, 406 insertions(+), 8 deletions(-)
 create mode 100644 libc/src/math/generic/nearbyintf128.cpp
 create mode 100644 libc/src/math/generic/scalbnf128.cpp
 create mode 100644 libc/src/math/nearbyintf128.h
 create mode 100644 libc/src/math/scalbnf128.h
 create mode 100644 libc/test/src/math/scalbnf128_test.cpp
 create mode 100644 libc/test/src/math/smoke/NearbyIntTest.h
 create mode 100644 libc/test/src/math/smoke/nearbyint_test.cpp
 create mode 100644 libc/test/src/math/smoke/nearbyintf128_test.cpp
 create mode 100644 libc/test/src/math/smoke/nearbyintf_test.cpp
 create mode 100644 libc/test/src/math/smoke/nearbyintl_test.cpp
 create mode 100644 libc/test/src/math/smoke/scalbnf128_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 1ac6bd93000082..eedd9342a09f02 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -527,11 +527,13 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lroundf128
     libc.src.math.modff128
     libc.src.math.nanf128
+    libc.src.math.nearbyintf128
     libc.src.math.nextafterf128
     libc.src.math.nextdownf128
     libc.src.math.nextupf128
     libc.src.math.rintf128
     libc.src.math.roundf128
+    libc.src.math.scalbnf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128
     libc.src.math.ufromfpf128
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 87e82e5eb9a067..4ddc1fb365e155 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -535,11 +535,13 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lroundf128
     libc.src.math.modff128
     libc.src.math.nanf128
+    libc.src.math.nearbyintf128
     libc.src.math.nextafterf128
     libc.src.math.nextdownf128
     libc.src.math.nextupf128
     libc.src.math.rintf128
     libc.src.math.roundf128
+    libc.src.math.scalbnf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128
     libc.src.math.ufromfpf128
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a8e28992766712..2576e4a92e85e2 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -560,12 +560,14 @@ if(LIBC_TYPES_HAS_FLOAT128)
     libc.src.math.lroundf128
     libc.src.math.modff128
     libc.src.math.nanf128
+    libc.src.math.nearbyintf128
     libc.src.math.nextafterf128
     libc.src.math.nextdownf128
     libc.src.math.nextupf128
     libc.src.math.rintf128
     libc.src.math.roundevenf128
     libc.src.math.roundf128
+    libc.src.math.scalbnf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128
     libc.src.math.ufromfpf128
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 7a7b6c9c8db5de..28503e1d13ab5a 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -188,7 +188,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | nan              | |check|          | |check|         | |check|                |                      | |check|                | 7.12.11.2              | F.10.8.2                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| nearbyint        | |check|          | |check|         | |check|                |                      |                        | 7.12.9.3               | F.10.6.3                   |
+| nearbyint        | |check|          | |check|         | |check|                |                      | |check|                | 7.12.9.3               | F.10.6.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | nextafter        | |check|          | |check|         | |check|                |                      | |check|                | 7.12.11.3              | F.10.8.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
@@ -208,7 +208,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | roundeven        | |check|          | |check|         | |check|                |                      | |check|                | 7.12.9.8               | F.10.6.8                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| scalbn           | |check|          | |check|         | |check|                |                      |                        | 7.12.6.19              | F.10.3.19                  |
+| scalbn           | |check|          | |check|         | |check|                |                      | |check|                | 7.12.6.19              | F.10.3.19                  |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | trunc            | |check|          | |check|         | |check|                |                      | |check|                | 7.12.9.9               | F.10.6.9                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 01aa7c70b3b9df..7a9031a0a330bb 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -604,6 +604,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"nearbyint", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"nearbyintf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
           FunctionSpec<"nearbyintl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
+          GuardedFunctionSpec<"nearbyintf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
 
           FunctionSpec<"nextafterf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
           FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
@@ -647,6 +648,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"scalbn", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
           FunctionSpec<"scalbnf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
           FunctionSpec<"scalbnl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
+          GuardedFunctionSpec<"scalbnf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_TYPES_HAS_FLOAT128">,
 
           FunctionSpec<"nanf", RetValSpec<FloatType>, [ArgSpec<ConstCharPtr>]>,
           FunctionSpec<"nan", RetValSpec<DoubleType>, [ArgSpec<ConstCharPtr>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index e8f699fabe3655..c34c58575441d3 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -252,6 +252,7 @@ add_math_entrypoint_object(nanf128)
 add_math_entrypoint_object(nearbyint)
 add_math_entrypoint_object(nearbyintf)
 add_math_entrypoint_object(nearbyintl)
+add_math_entrypoint_object(nearbyintf128)
 
 add_math_entrypoint_object(nextafter)
 add_math_entrypoint_object(nextafterf)
@@ -301,6 +302,7 @@ add_math_entrypoint_object(roundevenf128)
 add_math_entrypoint_object(scalbn)
 add_math_entrypoint_object(scalbnf)
 add_math_entrypoint_object(scalbnl)
+add_math_entrypoint_object(scalbnf128)
 
 add_math_entrypoint_object(sincos)
 add_math_entrypoint_object(sincosf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 574e000b82a8fc..daaf505008ca11 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -704,7 +704,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.nearest_integer_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -716,7 +716,7 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.nearest_integer_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
 )
 
 add_entrypoint_object(
@@ -728,7 +728,20 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.nearest_integer_operations
   COMPILE_OPTIONS
-    -O2
+    -O3
+)
+
+add_entrypoint_object(
+  nearbyintf128
+  SRCS
+    nearbyintf128.cpp
+  HDRS
+    ../nearbyintf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.nearest_integer_operations
+  COMPILE_OPTIONS
+    -O3
 )
 
 add_object_library(
@@ -2949,6 +2962,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  scalbnf128
+  SRCS
+    scalbnf128.cpp
+  HDRS
+    ../scalbnf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.manipulation_functions
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   fmaf
   SRCS
diff --git a/libc/src/math/generic/nearbyintf128.cpp b/libc/src/math/generic/nearbyintf128.cpp
new file mode 100644
index 00000000000000..fca3587f5b58ad
--- /dev/null
+++ b/libc/src/math/generic/nearbyintf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of nearbyintf128 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/nearbyintf128.h"
+#include "src/__support/FPUtil/NearestIntegerOperations.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, nearbyintf128, (float128 x)) {
+  return fputil::round_using_current_rounding_mode(x);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/scalbnf128.cpp b/libc/src/math/generic/scalbnf128.cpp
new file mode 100644
index 00000000000000..be3d29ed27e985
--- /dev/null
+++ b/libc/src/math/generic/scalbnf128.cpp
@@ -0,0 +1,27 @@
+//===-- Implementation of scalbnf128 function -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/scalbnf128.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, scalbnf128, (float128 x, int n)) {
+// TODO: should be switched to use `FLT_RADIX` in hdr/float_macros.h" instead
+// see: https://github.com/llvm/llvm-project/issues/90496
+#if !defined(__FLT_RADIX__)
+#error __FLT_RADIX__ undefined.
+#elif __FLT_RADIX__ != 2
+#error __FLT_RADIX__!=2, unimplemented.
+#else
+  return fputil::ldexp(x, n);
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/nearbyintf128.h b/libc/src/math/nearbyintf128.h
new file mode 100644
index 00000000000000..d12754a4810092
--- /dev/null
+++ b/libc/src/math/nearbyintf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for nearbyintf128 -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_NEARBYINTF128_H
+#define LLVM_LIBC_SRC_MATH_NEARBYINTF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 nearbyintf128(float128 x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_NEARBYINTF128_H
diff --git a/libc/src/math/scalbnf128.h b/libc/src/math/scalbnf128.h
new file mode 100644
index 00000000000000..bd3b560fb7cc42
--- /dev/null
+++ b/libc/src/math/scalbnf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for scalbnf128 --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SCALBNF128_H
+#define LLVM_LIBC_SRC_MATH_SCALBNF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float128 scalbnf128(float128 x, int n);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_SCALBNF128_H
diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h
index a76e0b8ef6f6f0..c58c322c981e43 100644
--- a/libc/test/UnitTest/FPMatcher.h
+++ b/libc/test/UnitTest/FPMatcher.h
@@ -219,4 +219,25 @@ template <typename T> struct FPTest : public Test {
     }                                                                          \
   } while (0)
 
+#define EXPECT_FP_EQ_ROUNDING_MODE(expected, actual, rounding_mode)            \
+  do {                                                                         \
+    using namespace LIBC_NAMESPACE::fputil::testing;                           \
+    ForceRoundingMode __r((rounding_mode));                                    \
+    if (__r.success) {                                                         \
+      EXPECT_FP_EQ((expected), (actual));                                      \
+    }                                                                          \
+  } while (0)
+
+#define EXPECT_FP_EQ_ROUNDING_NEAREST(expected, actual)                        \
+  EXPECT_FP_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Nearest)
+
+#define EXPECT_FP_EQ_ROUNDING_UPWARD(expected, actual)                         \
+  EXPECT_FP_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Upward)
+
+#define EXPECT_FP_EQ_ROUNDING_DOWNWARD(expected, actual)                       \
+  EXPECT_FP_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::Downward)
+
+#define EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(expected, actual)                    \
+  EXPECT_FP_EQ_ROUNDING_MODE((expected), (actual), RoundingMode::TowardZero)
+
 #endif // LLVM_LIBC_TEST_UNITTEST_FPMATCHER_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 55119868bdaa19..102188c332e408 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1650,6 +1650,21 @@ add_fp_unittest(
     libc.src.__support.FPUtil.normal_float
 )
 
+add_fp_unittest(
+  scalbnf128_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    scalbnf128_test.cpp
+  HDRS
+    ScalbnTest.h
+  DEPENDS
+    libc.src.math.scalbnf128
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.normal_float
+)
+
 add_fp_unittest(
   erff_test
   NEED_MPFR
diff --git a/libc/test/src/math/scalbnf128_test.cpp b/libc/test/src/math/scalbnf128_test.cpp
new file mode 100644
index 00000000000000..dc259de211489e
--- /dev/null
+++ b/libc/test/src/math/scalbnf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for scalbnf128 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScalbnTest.h"
+
+#include "src/math/scalbnf128.h"
+
+LIST_SCALBN_TESTS(float128, LIBC_NAMESPACE::scalbnf128)
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 22c59c97f6c7fd..112b2985829ca3 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -2188,6 +2188,58 @@ add_fp_unittest(
   UNIT_TEST_ONLY
 )
 
+add_fp_unittest(
+  nearbyint_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nearbyint_test.cpp
+  HDRS
+    NearbyIntTest.h
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.math.nearbyint
+)
+
+add_fp_unittest(
+  nearbyintf_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nearbyintf_test.cpp
+  HDRS
+    NearbyIntTest.h
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.math.nearbyintf
+)
+
+add_fp_unittest(
+  nearbyintl_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nearbyintl_test.cpp
+  HDRS
+    NearbyIntTest.h
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.math.nearbyintl
+)
+
+add_fp_unittest(
+  nearbyintf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    nearbyintf128_test.cpp
+  HDRS
+    NearbyIntTest.h
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.math.nearbyintf128
+)
+
 add_fp_unittest(
   nextafter_test
   SUITE
@@ -2739,7 +2791,6 @@ add_fp_unittest(
   DEPENDS
     libc.src.math.scalbn
     libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.normal_float
 )
 
 add_fp_unittest(
@@ -2753,7 +2804,6 @@ add_fp_unittest(
   DEPENDS
     libc.src.math.scalbnf
     libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.normal_float
 )
 
 add_fp_unittest(
@@ -2767,7 +2817,19 @@ add_fp_unittest(
   DEPENDS
     libc.src.math.scalbnl
     libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.normal_float
+)
+
+add_fp_unittest(
+  scalbnf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    scalbnf128_test.cpp
+  HDRS
+    ScalbnTest.h
+  DEPENDS
+    libc.src.math.scalbnf128
+    libc.src.__support.FPUtil.fp_bits
 )
 
 add_fp_unittest(
diff --git a/libc/test/src/math/smoke/NearbyIntTest.h b/libc/test/src/math/smoke/NearbyIntTest.h
new file mode 100644
index 00000000000000..0051ff9447a7ed
--- /dev/null
+++ b/libc/test/src/math/smoke/NearbyIntTest.h
@@ -0,0 +1,100 @@
+//===-- Utility class to test different flavors of nearbyint ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_NEARBYINTTEST_H
+#define LLVM_LIBC_TEST_SRC_MATH_NEARBYINTTEST_H
+
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO,
+                                          FE_TONEAREST};
+
+template <typename T>
+class NearbyIntTestTemplate : public LIBC_NAMESPACE::testing::Test {
+
+  DECLARE_SPECIAL_CONSTANTS(T)
+
+public:
+  typedef T (*NearbyIntFunc)(T);
+
+  void testNaN(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ALL_ROUNDING(func(aNaN), aNaN);
+  }
+
+  void testInfinities(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ALL_ROUNDING(func(inf), inf);
+    EXPECT_FP_EQ_ALL_ROUNDING(func(neg_inf), neg_inf);
+  }
+
+  void testZeroes(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ALL_ROUNDING(func(zero), zero);
+    EXPECT_FP_EQ_ALL_ROUNDING(func(neg_zero), neg_zero);
+  }
+
+  void testIntegers(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(1.0)), T(1.0));
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(-1.0)), T(-1.0));
+
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(1234.0)), T(1234.0));
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(-1234.0)), T(-1234.0));
+
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(10.0)), T(10.0));
+    EXPECT_FP_EQ_ALL_ROUNDING(func(T(-10.0)), T(-10.0));
+
+    FPBits ints_start(T(0));
+    ints_start.set_biased_exponent(FPBits::SIG_LEN + FPBits::EXP_BIAS);
+    T expected = ints_start.get_val();
+    EXPECT_FP_EQ_ALL_ROUNDING(func(expected), expected);
+  }
+
+  void testSubnormalToNearest(NearbyIntFunc func) {
+    ASSERT_FP_EQ(func(min_denormal), zero);
+    ASSERT_FP_EQ(func(-min_denormal), neg_zero);
+  }
+
+  void testSubnormalTowardZero(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(func(min_denormal), zero);
+    EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(func(-min_denormal), neg_zero);
+  }
+
+  void testSubnormalToPosInf(NearbyIntFunc func) {
+    EXPECT_FP_EQ_ROUNDING_UPWARD(func(min_denormal), FPBits::one().get_val());
+    EXPECT_FP_EQ_ROUNDING_UPWARD(func(-min_denormal), neg_zero);
+  }
+
+  void testSubnormalToNegInf(NearbyIntFunc func) {
+    T negative_one = FPBits::one(Sign::NEG).get_val();
+    EXPECT_FP_EQ_ROUNDING_DOWNWARD(func(min_denormal), zero);
+    EXPECT_FP_EQ_ROUNDING_DOWNWARD(func(-min_denormal), negative_one);
+  }
+};
+
+#define LIST_NEARBYINT_TESTS(T, func)                                          \
+  using LlvmLibcNearbyIntTest = NearbyIntTestTemplate<T>;                      \
+  TEST_F(LlvmLibcNearbyIntTest, TestNaN) { testNaN(&func); }                   \
+  TEST_F(LlvmLibcNearbyIntTest, TestInfinities) { testInfinities(&func); }     \
+  TEST_F(LlvmLibcNearbyIntTest, TestZeroes) { testZeroes(&func); }             \
+  TEST_F(LlvmLibcNearbyIntTest, TestIntegers) { testIntegers(&func); }         \
+  TEST_F(LlvmLibcNearbyIntTest, TestSubnormalToNearest) {                      \
+    testSubnormalToNearest(&func);                                             \
+  }                                                                            \
+  TEST_F(LlvmLibcNearbyIntTest, TestSubnormalTowardZero) {                     \
+    testSubnormalTowardZero(&func);                                            \
+  }                                                                            \
+  TEST_F(LlvmLibcNearbyIntTest, TestSubnormalToPosInf) {                       \
+    testSubnormalToPosInf(&func);                                              \
+  }                                                                            \
+  TEST_F(LlvmLibcNearbyIntTest, TestSubnormalToNegInf) {                       \
+    testSubnormalToNegInf(&func);                                              \
+  }
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_NEARBYINTTEST_H
diff --git a/libc/test/src/math/smoke/nearbyint_test.cpp b/libc/test/src/math/smoke/nearbyint_test.cpp
new file mode 100644
index 00000000000000..11a5c3372e73ea
--- /dev/null
+++ b/libc/test/src/math/smoke/nearbyint_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nearbyint -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NearbyIntTest.h"
+
+#include "src/math/nearbyint.h"
+
+LIST_NEARBYINT_TESTS(double, LIBC_NAMESPACE::nearbyint)
diff --git a/libc/test/src/math/smoke/nearbyintf128_test.cpp b/libc/test/src/math/smoke/nearbyintf128_test.cpp
new file mode 100644
index 00000000000000..98fbb2858fafa6
--- /dev/null
+++ b/libc/test/src/math/smoke/nearbyintf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nearbyintf128 ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NearbyIntTest.h"
+
+#include "src/math/nearbyintf128.h"
+
+LIST_NEARBYINT_TESTS(float128, LIBC_NAMESPACE::nearbyintf128)
diff --git a/libc/test/src/math/smoke/nearbyintf_test.cpp b/libc/test/src/math/smoke/nearbyintf_test.cpp
new file mode 100644
index 00000000000000..fd26153cfffb94
--- /dev/null
+++ b/libc/test/src/math/smoke/nearbyintf_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nearbyintf ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NearbyIntTest.h"
+
+#include "src/math/nearbyintf.h"
+
+LIST_NEARBYINT_TESTS(float, LIBC_NAMESPACE::nearbyintf)
diff --git a/libc/test/src/math/smoke/nearbyintl_test.cpp b/libc/test/src/math/smoke/nearbyintl_test.cpp
new file mode 100644
index 00000000000000..a6d81a1439e17c
--- /dev/null
+++ b/libc/test/src/math/smoke/nearbyintl_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for nearbyintl ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NearbyIntTest.h"
+
+#include "src/math/nearbyintl.h"
+
+LIST_NEARBYINT_TESTS(long double, LIBC_NAMESPACE::nearbyintl)
diff --git a/libc/test/src/math/smoke/scalbnf128_test.cpp b/libc/test/src/math/smoke/scalbnf128_test.cpp
new file mode 100644
index 00000000000000..dc259de211489e
--- /dev/null
+++ b/libc/test/src/math/smoke/scalbnf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for scalbnf128 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScalbnTest.h"
+
+#include "src/math/scalbnf128.h"
+
+LIST_SCALBN_TESTS(float128, LIBC_NAMESPACE::scalbnf128)

From ae7ce1c6e728a262049c998c667b79165b285bd8 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Mon, 29 Apr 2024 16:30:57 -0700
Subject: [PATCH 05/65] [CodeGen] Remove extraneous ArrayRef (NFC) (#90423)

We don't need to explicitly create an instance of ArrayRef here
because getIndexedOffsetInType takes ArrayRef, and ArrayRef can be
implicitly constructed from a C array.
---
 llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index f2d5c3c867c2dd..e5f164b182723f 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -890,7 +890,7 @@ struct VectorInfo {
           ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
           ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
       };
-      int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, ArrayRef(Idx, 2));
+      int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, Idx);
       Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
     }
 

From 1cb33713910501c6352d0eb2a15b7a15e6e18695 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Mon, 29 Apr 2024 23:50:18 +0000
Subject: [PATCH 06/65] Ensure test writes objects to test temp dir

---
 clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp b/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
index 293aef6781677f..790899486ec9d1 100644
--- a/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
+++ b/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
@@ -2,10 +2,10 @@
 // This test is adapted from coro-elide.cpp and splits functions into two files.
 //
 // RUN: split-file %s %t
-// RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-callee.cpp -o coro-elide-callee.o
-// RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-caller.cpp -o coro-elide-caller.o
-// RUN: llvm-lto -thinlto coro-elide-callee.o coro-elide-caller.o -o summary
-// RUN: %clang_cc1 -O2 -x ir coro-elide-caller.o -fthinlto-index=summary.thinlto.bc -emit-llvm -o - | FileCheck %s
+// RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-callee.cpp -o %t/coro-elide-callee.o
+// RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-caller.cpp -o %t/coro-elide-caller.o
+// RUN: llvm-lto -thinlto %t/coro-elide-callee.o %t/coro-elide-caller.o -o summary
+// RUN: %clang_cc1 -O2 -x ir %t/coro-elide-caller.o -fthinlto-index=summary.thinlto.bc -emit-llvm -o - | FileCheck %s
 
 //--- coro-elide-task.h
 #pragma once

From 6566ffdf8a543f50b75e9b3c66d771a3a9eb1560 Mon Sep 17 00:00:00 2001
From: Kevin Frei <kevinfrei@users.noreply.github.com>
Date: Mon, 29 Apr 2024 17:00:19 -0700
Subject: [PATCH 07/65] Clean up the GSym error aggregation code, and pass the
 aggregator by reference (#89688)

There was a problem with `llvm-gsymutil`s error aggregation code not
properly collecting aggregate errors. The was that the output aggregator
collecting errors from other threads wasn't being passed by reference,
so it was merging them into a copy of the app-wide output aggregator.

While I was at it, I added a better comment above the "Merge" code and
made it a bit more efficient, after learning more details about
`emplace` vs. `insert` or `operator[]` on `std::map`'s.

Co-authored-by: Kevin Frei <freik@meta.com>
---
 llvm/include/llvm/DebugInfo/GSYM/OutputAggregator.h | 9 ++++-----
 llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp        | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/OutputAggregator.h b/llvm/include/llvm/DebugInfo/GSYM/OutputAggregator.h
index 8deea3bff1ed76..35ef0a8bc89085 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/OutputAggregator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/OutputAggregator.h
@@ -57,13 +57,12 @@ class OutputAggregator {
   }
 
   // For multi-threaded usage, we can collect stuff in another aggregator,
-  // then merge it in here
+  // then merge it in here. Note that this is *not* thread safe. It is up to
+  // the caller to ensure that this is only called from one thread at a time.
   void Merge(const OutputAggregator &other) {
     for (auto &&[name, count] : other.Aggregation) {
-      auto it = Aggregation.find(name);
-      if (it == Aggregation.end())
-        Aggregation.emplace(name, count);
-      else
+      auto [it, inserted] = Aggregation.emplace(name, count);
+      if (!inserted)
         it->second += count;
     }
   }
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index ff6b560d11726b..601686fdd3dd51 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -612,7 +612,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
       DWARFDie Die = getDie(*CU);
       if (Die) {
         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
-        pool.async([this, CUI, &LogMutex, Out, Die]() mutable {
+        pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
           std::string storage;
           raw_string_ostream StrStream(storage);
           OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);

From ce3485a0cd12b45c725f008a7836e71a1d72df49 Mon Sep 17 00:00:00 2001
From: Max Winkler <max.enrico.winkler@gmail.com>
Date: Mon, 29 Apr 2024 20:29:34 -0400
Subject: [PATCH 08/65] [llvm][GlobalOpt] Remove empty atexit
 destructors/handlers (#88836)

https://godbolt.org/z/frjhqMKqc for an example.

Removal of allocations due to empty `__cxa_atexit` destructor calls is
done by the following globalopt pass.
This pass currently does not look for `atexit` handlers generated for
platforms that do not use `__cxa_atexit`.
By default Win32 and AIX use `atexit`.

I don't see an easy way to only remove `atexit` calls that the compiler
generated without looking at the generated mangled name of the atexit
handler that is being registered.
However we can easily remove all `atexit` calls that register empty
handlers since it is trivial to ensure the removed call still returns
`0` which is the value for success.
---
 .../llvm/Analysis/TargetLibraryInfo.def       |  5 ++
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         | 45 +++++++++-------
 llvm/test/Transforms/GlobalOpt/atexit-dtor.ll | 54 +++++++++++++++++++
 .../tools/llvm-tli-checker/ps4-tli-check.yaml | 12 +++--
 .../Analysis/TargetLibraryInfoTest.cpp        |  2 +
 5 files changed, 96 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/Transforms/GlobalOpt/atexit-dtor.ll

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 37221eb9e47115..717693a7cf63c1 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -471,6 +471,11 @@ TLI_DEFINE_ENUM_INTERNAL(cxa_atexit)
 TLI_DEFINE_STRING_INTERNAL("__cxa_atexit")
 TLI_DEFINE_SIG_INTERNAL(Int, Ptr, Ptr, Ptr)
 
+/// int atexit(void (*f)(void));
+TLI_DEFINE_ENUM_INTERNAL(atexit)
+TLI_DEFINE_STRING_INTERNAL("atexit")
+TLI_DEFINE_SIG_INTERNAL(Int, Ptr)
+
 /// void __cxa_guard_abort(guard_t *guard);
 /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi.
 TLI_DEFINE_ENUM_INTERNAL(cxa_guard_abort)
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index fbb83e787f6327..c8c835115a992a 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -87,6 +87,7 @@ STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
+STATISTIC(NumAtExitRemoved, "Number of atexit handlers removed");
 STATISTIC(NumInternalFunc, "Number of internal functions");
 STATISTIC(NumColdCC, "Number of functions marked coldcc");
 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
@@ -2328,18 +2329,19 @@ OptimizeGlobalAliases(Module &M,
 }
 
 static Function *
-FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+FindAtExitLibFunc(Module &M,
+                  function_ref<TargetLibraryInfo &(Function &)> GetTLI,
+                  LibFunc Func) {
   // Hack to get a default TLI before we have actual Function.
   auto FuncIter = M.begin();
   if (FuncIter == M.end())
     return nullptr;
   auto *TLI = &GetTLI(*FuncIter);
 
-  LibFunc F = LibFunc_cxa_atexit;
-  if (!TLI->has(F))
+  if (!TLI->has(Func))
     return nullptr;
 
-  Function *Fn = M.getFunction(TLI->getName(F));
+  Function *Fn = M.getFunction(TLI->getName(Func));
   if (!Fn)
     return nullptr;
 
@@ -2347,17 +2349,18 @@ FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
   TLI = &GetTLI(*Fn);
 
   // Make sure that the function has the correct prototype.
-  if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
+  LibFunc F;
+  if (!TLI->getLibFunc(*Fn, F) || F != Func)
     return nullptr;
 
   return Fn;
 }
 
-/// Returns whether the given function is an empty C++ destructor and can
-/// therefore be eliminated.
-/// Note that we assume that other optimization passes have already simplified
-/// the code so we simply check for 'ret'.
-static bool cxxDtorIsEmpty(const Function &Fn) {
+/// Returns whether the given function is an empty C++ destructor or atexit
+/// handler and can therefore be eliminated. Note that we assume that other
+/// optimization passes have already simplified the code so we simply check for
+/// 'ret'.
+static bool IsEmptyAtExitFunction(const Function &Fn) {
   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
   // nounwind, but that doesn't seem worth doing.
   if (Fn.isDeclaration())
@@ -2373,7 +2376,7 @@ static bool cxxDtorIsEmpty(const Function &Fn) {
   return false;
 }
 
-static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
+static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX) {
   /// Itanium C++ ABI p3.3.5:
   ///
   ///   After constructing a global (or local static) object, that will require
@@ -2386,8 +2389,8 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   ///   registered before this one. It returns zero if registration is
   ///   successful, nonzero on failure.
 
-  // This pass will look for calls to __cxa_atexit where the function is trivial
-  // and remove them.
+  // This pass will look for calls to __cxa_atexit or atexit where the function
+  // is trivial and remove them.
   bool Changed = false;
 
   for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
@@ -2400,14 +2403,17 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
 
     Function *DtorFn =
       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
-    if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
+    if (!DtorFn || !IsEmptyAtExitFunction(*DtorFn))
       continue;
 
     // Just remove the call.
     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
     CI->eraseFromParent();
 
-    ++NumCXXDtorsRemoved;
+    if (isCXX)
+      ++NumCXXDtorsRemoved;
+    else
+      ++NumAtExitRemoved;
 
     Changed |= true;
   }
@@ -2525,9 +2531,12 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
 
     // Try to remove trivial global destructors if they are not removed
     // already.
-    Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
-    if (CXAAtExitFn)
-      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
+    if (Function *CXAAtExitFn =
+            FindAtExitLibFunc(M, GetTLI, LibFunc_cxa_atexit))
+      LocalChange |= OptimizeEmptyGlobalAtExitDtors(CXAAtExitFn, true);
+
+    if (Function *AtExitFn = FindAtExitLibFunc(M, GetTLI, LibFunc_atexit))
+      LocalChange |= OptimizeEmptyGlobalAtExitDtors(AtExitFn, false);
 
     // Optimize IFuncs whose callee's are statically known.
     LocalChange |= OptimizeStaticIFuncs(M);
diff --git a/llvm/test/Transforms/GlobalOpt/atexit-dtor.ll b/llvm/test/Transforms/GlobalOpt/atexit-dtor.ll
new file mode 100644
index 00000000000000..bf78fd3d3ef49b
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/atexit-dtor.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -S -passes=globalopt | FileCheck %s
+
+declare dso_local i32 @atexit(ptr)
+
+define dso_local void @empty_atexit_handler() {
+; CHECK-LABEL: define dso_local void @empty_atexit_handler() local_unnamed_addr {
+; CHECK-NEXT:    ret void
+;
+  ret void
+}
+
+; Check that `atexit` is removed if the handler is empty.
+; Check that a removed `atexit` call returns `0` which is the value that denotes success.
+define dso_local noundef i32 @register_atexit_handler() {
+; CHECK-LABEL: define dso_local noundef i32 @register_atexit_handler() local_unnamed_addr {
+; CHECK-NEXT:    ret i32 0
+;
+  %1 = call i32 @atexit(ptr @empty_atexit_handler)
+  ret i32 %1
+}
+
+declare dso_local void @declared_atexit_handler()
+
+; Check that an atexit handler with only a declaration is not removed.
+define dso_local noundef i32 @register_declared_atexit_handler() {
+; CHECK-LABEL: define dso_local noundef i32 @register_declared_atexit_handler() local_unnamed_addr {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @atexit(ptr @declared_atexit_handler)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = call i32 @atexit(ptr @declared_atexit_handler)
+  ret i32 %1
+}
+
+declare dso_local void @external_exit_func()
+
+define dso_local void @nonempty_atexit_handler() {
+; CHECK-LABEL: define dso_local void @nonempty_atexit_handler() {
+; CHECK-NEXT:    call void @external_exit_func()
+; CHECK-NEXT:    ret void
+;
+  call void @external_exit_func()
+  ret void
+}
+
+; Check that an atexit handler that consists of any instructions other than `ret` is considered nonempty and not removed.
+define dso_local noundef i32 @register_nonempty_atexit_handler() {
+; CHECK-LABEL: define dso_local noundef i32 @register_nonempty_atexit_handler() local_unnamed_addr {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @atexit(ptr @nonempty_atexit_handler)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %1 = call i32 @atexit(ptr @nonempty_atexit_handler)
+  ret i32 %1
+}
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 46f6a2d0a554ac..95c23007b1a05a 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,21 +34,21 @@
 #
 # CHECK: << Total TLI yes SDK no:  8
 # CHECK: >> Total TLI no  SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 238
+# CHECK: == Total TLI yes SDK yes: 239
 #
 # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
 # WRONG_DETAIL: >> TLI no  SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
 # WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t
 # WRONG_SUMMARY: << Total TLI yes SDK no:  9{{$}}
 # WRONG_SUMMARY: >> Total TLI no  SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 237
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 238
 #
 ## The -COUNT suffix doesn't care if there are too many matches, so check
 ## the exact count first; the two directives should add up to that.
 ## Yes, this means additions to TLI will fail this test, but the argument
 ## to -COUNT can't be an expression.
-# AVAIL: TLI knows 479 symbols, 246 available
-# AVAIL-COUNT-246: {{^}} available
+# AVAIL: TLI knows 480 symbols, 247 available
+# AVAIL-COUNT-247: {{^}} available
 # AVAIL-NOT:       {{^}} available
 # UNAVAIL-COUNT-233: not available
 # UNAVAIL-NOT:       not available
@@ -263,6 +263,10 @@ DynamicSymbols:
     Type:            STT_FUNC
     Section:         .text
     Binding:         STB_GLOBAL
+  - Name:            atexit
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
   - Name:            atof
     Type:            STT_FUNC
     Section:         .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 8e3fe3b44a84a9..1fe94e2aae059e 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -495,6 +495,8 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare i32 @__cxa_guard_acquire(%struct*)\n"
       "declare void @__cxa_guard_release(%struct*)\n"
 
+      "declare i32 @atexit(void ()*)\n"
+
       "declare i32 @__nvvm_reflect(i8*)\n"
 
       "declare i8* @__memcpy_chk(i8*, i8*, i64, i64)\n"

From 9a1386e5fabb0d2741f2f7c873342eb86e350e9e Mon Sep 17 00:00:00 2001
From: Andreas Jonson <andjo403@hotmail.com>
Date: Tue, 30 Apr 2024 02:42:31 +0200
Subject: [PATCH 09/65] [NFC] Remove method from FoldingSet that already
 existed in APInt. (#90486)

Noticed that there already was a function in APInt that updated a
FoldingSet so there was no need for me to add it in
https://github.com/llvm/llvm-project/pull/84617.
---
 llvm/include/llvm/ADT/FoldingSet.h | 8 --------
 llvm/lib/IR/AttributeImpl.h        | 4 ++--
 llvm/lib/IR/Attributes.cpp         | 4 ++--
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h
index 28a84d96588383..f82eabd5044b22 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -16,7 +16,6 @@
 #ifndef LLVM_ADT_FOLDINGSET_H
 #define LLVM_ADT_FOLDINGSET_H
 
-#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallVector.h"
@@ -355,13 +354,6 @@ class FoldingSetNodeID {
     AddInteger(unsigned(I));
     AddInteger(unsigned(I >> 32));
   }
-  void AddInteger(const APInt &Int) {
-    AddInteger(Int.getBitWidth());
-    const auto *Parts = Int.getRawData();
-    for (int i = 0, N = Int.getNumWords(); i < N; ++i) {
-      AddInteger(Parts[i]);
-    }
-  }
 
   void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); }
   void AddString(StringRef String);
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 9a6427bbc3d557..58dc14588f41a7 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -121,8 +121,8 @@ class AttributeImpl : public FoldingSetNode {
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
                       const ConstantRange &CR) {
     ID.AddInteger(Kind);
-    ID.AddInteger(CR.getLower());
-    ID.AddInteger(CR.getUpper());
+    CR.getLower().Profile(ID);
+    CR.getUpper().Profile(ID);
   }
 };
 
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 9c48a481de1ff6..9f27f176b104ec 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -173,8 +173,8 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
   LLVMContextImpl *pImpl = Context.pImpl;
   FoldingSetNodeID ID;
   ID.AddInteger(Kind);
-  ID.AddInteger(CR.getLower());
-  ID.AddInteger(CR.getUpper());
+  CR.getLower().Profile(ID);
+  CR.getUpper().Profile(ID);
 
   void *InsertPoint;
   AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);

From 65ee8f10b2017349b7742843fbe4accb172736e9 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Mon, 29 Apr 2024 18:06:07 -0700
Subject: [PATCH 10/65] [mlir][sparse] fold explicit value during
 sparsification (#90530)

This ensures the explicit value is generated (and not a load into the
values array). Note that actually not storing values array at all is
still TBD, this is just the very first step.
---
 .../SparseTensor/IR/SparseTensorType.h        |  8 +-
 .../Transforms/Sparsification.cpp             | 10 ++-
 .../Transforms/Utils/CodegenUtils.h           | 10 +++
 .../SparseTensor/sparse_matmul_one.mlir       | 75 +++++++++++++++++++
 4 files changed, 99 insertions(+), 4 deletions(-)
 create mode 100755 mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
index 34d99913fbd51b..ea3d8013b45671 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
@@ -344,10 +344,14 @@ class SparseTensorType {
   unsigned getPosWidth() const { return enc ? enc.getPosWidth() : 0; }
 
   /// Returns the explicit value, defaulting to null Attribute for unset.
-  Attribute getExplicitVal() const { return enc.getExplicitVal(); }
+  Attribute getExplicitVal() const {
+    return enc ? enc.getExplicitVal() : nullptr;
+  }
 
   /// Returns the implicit value, defaulting to null Attribute for 0.
-  Attribute getImplicitVal() const { return enc.getImplicitVal(); }
+  Attribute getImplicitVal() const {
+    return enc ? enc.getImplicitVal() : nullptr;
+  }
 
   /// Returns the coordinate-overhead MLIR type, defaulting to `IndexType`.
   Type getCrdType() const { return enc.getCrdElemType(); }
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 0a9bb40b458d68..0c8e431d8c9964 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -498,9 +498,15 @@ static Value genTensorLoad(CodegenEnv &env, OpBuilder &builder, ExprId exp) {
   Value val = env.exp(exp).val;
   if (val)
     return val;
-  // Load during insertion.
+  // Get tensor operand.
   linalg::GenericOp op = env.op();
+  Location loc = op.getLoc();
   OpOperand *t = &op->getOpOperand(env.exp(exp).tensor);
+  // Fold binary-valued tensor into explicit value.
+  const auto stt = getSparseTensorType(t->get());
+  if (auto explVal = stt.getExplicitVal())
+    return genValFromAttr(builder, loc, explVal);
+  // Load during insertion.
   if (env.isSparseOutput(t)) {
     if (env.isCustomReduc())
       return genInsertionLoadReduce(env, builder, t);
@@ -509,7 +515,7 @@ static Value genTensorLoad(CodegenEnv &env, OpBuilder &builder, ExprId exp) {
   // Actual load.
   SmallVector<Value> args;
   Value ptr = genSubscript(env, builder, t, args);
-  return builder.create<memref::LoadOp>(op.getLoc(), ptr, args);
+  return builder.create<memref::LoadOp>(loc, ptr, args);
 }
 
 /// Generates a store on a dense or sparse tensor.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.h
index ce5831d999e9a4..cf3c35f5fa4c78 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.h
@@ -399,6 +399,16 @@ inline Value constantLevelTypeEncoding(OpBuilder &builder, Location loc,
   return constantI64(builder, loc, static_cast<uint64_t>(lt));
 }
 
+// Generates a constant from a validated value carrying attribute.
+inline Value genValFromAttr(OpBuilder &builder, Location loc, Attribute attr) {
+  if (auto arrayAttr = dyn_cast<ArrayAttr>(attr)) {
+    Type tp = cast<TypedAttr>(arrayAttr[0]).getType();
+    return builder.create<complex::ConstantOp>(loc, tp, arrayAttr);
+  }
+  return builder.create<arith::ConstantOp>(loc, cast<TypedAttr>(attr));
+}
+
+// TODO: is this at the right place?
 inline bool isZeroRankedTensorOrScalar(Type type) {
   auto rtp = dyn_cast<RankedTensorType>(type);
   return !rtp || rtp.getRank() == 0;
diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir
new file mode 100755
index 00000000000000..82f3147d3206bd
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir
@@ -0,0 +1,75 @@
+// RUN: mlir-opt %s --linalg-generalize-named-ops \
+// RUN:             --sparsification-and-bufferization | FileCheck %s
+
+#CSR_ones_complex = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed)
+// explicitVal = (1.0, 0.0) : complex<f32>,
+// implicitVal = (0.0, 0.0) : complex<f32>
+}>
+
+#CSR_ones_fp = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  explicitVal = 1.0 : f32,
+  implicitVal = 0.0 : f32
+}>
+
+#CSR_ones_int = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed),
+  explicitVal = 1 : i32,
+  implicitVal = 0 : i32
+}>
+
+// CHECK-LABEL:   func.func @matmul_complex
+//
+// TODO: make this work
+//
+func.func @matmul_complex(%a: tensor<10x20xcomplex<f32>>,
+                          %b: tensor<20x30xcomplex<f32>, #CSR_ones_complex>,
+                          %c: tensor<10x30xcomplex<f32>>) -> tensor<10x30xcomplex<f32>> {
+  %0 = linalg.matmul
+    ins(%a, %b: tensor<10x20xcomplex<f32>>, tensor<20x30xcomplex<f32>,#CSR_ones_complex>)
+    outs(%c: tensor<10x30xcomplex<f32>>) -> tensor<10x30xcomplex<f32>>
+  return %0 : tensor<10x30xcomplex<f32>>
+}
+
+// CHECK-LABEL:   func.func @matmul_fp
+// CHECK:         scf.for
+// CHECK:           scf.for
+// CHECK:             %[[X:.*]] = memref.load
+// CHECK:             scf.for
+// CHECK:               %[[I:.*]] = memref.load
+// CHECK:               %[[Y:.*]] = memref.load
+// CHECK:               %[[M:.*]] = arith.addf %[[Y]], %[[X]] : f32
+// CHECK:               memref.store %[[M]]
+// CHECK:             }
+// CHECK:           }
+// CHECK:         }
+func.func @matmul_fp(%a: tensor<10x20xf32>,
+                     %b: tensor<20x30xf32, #CSR_ones_fp>,
+                     %c: tensor<10x30xf32>) -> tensor<10x30xf32> {
+  %0 = linalg.matmul
+    ins(%a, %b: tensor<10x20xf32>, tensor<20x30xf32,#CSR_ones_fp>)
+    outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
+  return %0 : tensor<10x30xf32>
+}
+
+// CHECK-LABEL:   func.func @matmul_int
+// CHECK:         scf.for
+// CHECK:           scf.for
+// CHECK:             %[[X:.*]] = memref.load
+// CHECK:             scf.for
+// CHECK:               %[[I:.*]] = memref.load
+// CHECK:               %[[Y:.*]] = memref.load
+// CHECK:               %[[M:.*]] = arith.addi %[[Y]], %[[X]] : i32
+// CHECK:               memref.store %[[M]]
+// CHECK:             }
+// CHECK:           }
+// CHECK:         }
+func.func @matmul_int(%a: tensor<10x20xi32>,
+                      %b: tensor<20x30xi32, #CSR_ones_int>,
+                      %c: tensor<10x30xi32>) -> tensor<10x30xi32> {
+  %0 = linalg.matmul
+    ins(%a, %b: tensor<10x20xi32>, tensor<20x30xi32,#CSR_ones_int>)
+    outs(%c: tensor<10x30xi32>) -> tensor<10x30xi32>
+  return %0 : tensor<10x30xi32>
+}

From b1867e18c346e9621e14270bea2d1acb7d2a9ce0 Mon Sep 17 00:00:00 2001
From: Dan Liew <delcypher@gmail.com>
Date: Mon, 29 Apr 2024 18:37:47 -0700
Subject: [PATCH 11/65] [Attributes] Support Attributes being declared as
 supporting an experimental late parsing mode "extension" (#88596)

This patch changes the `LateParsed` field of `Attr` in `Attr.td` to be
an instantiation of the new `LateAttrParseKind` class. The instation can be one of the following:

* `LateAttrParsingNever` - Corresponds with the false value of `LateParsed` prior to this patch (the default for an attribute).
* `LateAttrParseStandard` - Corresponds with the true value of `LateParsed` prior to this patch.
* `LateAttrParseExperimentalExt` - A new mode described below.

`LateAttrParseExperimentalExt` is an experimental extension to
`LateAttrParseStandard`. Essentially this allows
`Parser::ParseGNUAttributes(...)` to distinguish between these cases:

1. Only `LateAttrParseExperimentalExt` attributes should be late parsed.
2. Both `LateAttrParseExperimentalExt` and `LateAttrParseStandard`
  attributes should be late parsed.

Callers (and indirect callers) of `Parser::ParseGNUAttributes(...)`
indicate the desired behavior by setting a flag in the
`LateParsedAttrList` object that is passed to the function.

In addition to the above, a new driver and frontend flag
(`-fexperimental-late-parse-attributes`) with a corresponding LangOpt
(`ExperimentalLateParseAttributes`) is added that changes how
`LateAttrParseExperimentalExt` attributes are parsed.

* When the flag is disabled (default), in cases where only
  `LateAttrParsingExperimentalOnly` late parsing is requested, the
  attribute will be parsed immediately (i.e. **NOT** late parsed). This
  allows the attribute to act just like a `LateAttrParseStandard`
  attribute when the flag is disabled.

* When the flag is enabled, in cases where only
  `LateAttrParsingExperimentalOnly` late parsing is requested, the
  attribute will be late parsed.

The motivation behind this change is to allow the new `counted_by`
attribute (part of `-fbounds-safety`) to support late parsing but
**only** when `-fexperimental-late-parse-attributes` is enabled. This
attribute needs to support late parsing to allow it to refer to fields
later in a struct definition (or function parameters declared later).
However, there isn't a precedent for supporting late attribute parsing
in C so this flag allows the new behavior to exist in Clang but not be
on by default. This behavior was requested as part of the
`-fbounds-safety` RFC process
(https://discourse.llvm.org/t/rfc-enforcing-bounds-safety-in-c-fbounds-safety/70854/68).

This patch doesn't introduce any uses of `LateAttrParseExperimentalExt`.
This will be added for the `counted_by` attribute in a future patch
(https://github.com/llvm/llvm-project/pull/87596). A consequence is the
new behavior added in this patch is not yet testable. Hence, the lack of
tests covering the new behavior.

rdar://125400257
---
 clang/docs/ReleaseNotes.rst                   |   4 +
 clang/include/clang/Basic/Attr.td             |  79 +++++++---
 clang/include/clang/Basic/LangOptions.def     |   1 +
 clang/include/clang/Driver/Options.td         |   7 +
 clang/include/clang/Parse/Parser.h            |  13 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   3 +
 clang/lib/Parse/ParseDecl.cpp                 |  40 ++++-
 .../experimental-late-parse-attributes.c      |  12 ++
 clang/utils/TableGen/ClangAttrEmitter.cpp     | 137 ++++++++++++++++--
 9 files changed, 254 insertions(+), 42 deletions(-)
 create mode 100644 clang/test/Driver/experimental-late-parse-attributes.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 676f38a8e94c81..4c0fe5bcf6b122 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -256,6 +256,10 @@ New Compiler Flags
 - ``-fexperimental-modules-reduced-bmi`` enables the Reduced BMI for C++20 named modules.
   See the document of standard C++ modules for details.
 
+- ``-fexperimental-late-parse-attributes`` enables an experimental feature to
+  allow late parsing certain attributes in specific contexts where they would
+  not normally be late parsed.
+
 Deprecated Compiler Flags
 -------------------------
 
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 97e06fe7d2e6aa..0225598cbbe8ad 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -592,6 +592,49 @@ class AttrSubjectMatcherAggregateRule<AttrSubject subject> {
 
 def SubjectMatcherForNamed : AttrSubjectMatcherAggregateRule<Named>;
 
+// Enumeration specifying what kind of behavior should be used for late
+// parsing of attributes.
+class LateAttrParseKind <int val> {
+  int Kind = val;
+}
+
+// Never late parsed
+def LateAttrParseNever : LateAttrParseKind<0>;
+
+// Standard late attribute parsing
+//
+// This is language dependent. For example:
+//
+// * For C++ enables late parsing of a declaration attributes
+// * For C does not enable late parsing of attributes
+//
+def LateAttrParseStandard : LateAttrParseKind<1>;
+
+// Experimental extension to standard late attribute parsing
+//
+// This extension behaves like `LateAttrParseStandard` but allows
+// late parsing attributes in more contexts.
+//
+// In contexts where `LateAttrParseStandard` attributes are late
+// parsed, `LateAttrParseExperimentalExt` attributes will also
+// be late parsed.
+//
+// In contexts that only late parse `LateAttrParseExperimentalExt` attributes
+// (see `LateParsedAttrList::lateAttrParseExperimentalExtOnly()`)
+//
+// * If `-fexperimental-late-parse-attributes`
+//   (`LangOpts.ExperimentalLateParseAttributes`) is enabled the attribute
+//   will be late parsed.
+// * If `-fexperimental-late-parse-attributes`
+//   (`LangOpts.ExperimentalLateParseAttributes`) is disabled the attribute
+//   will **not** be late parsed (i.e parsed immediately).
+//
+// The following contexts are supported:
+//
+// * TODO: Add contexts here when they are implemented.
+//
+def LateAttrParseExperimentalExt : LateAttrParseKind<2>;
+
 class Attr {
   // The various ways in which an attribute can be spelled in source
   list<Spelling> Spellings;
@@ -603,8 +646,8 @@ class Attr {
   list<Accessor> Accessors = [];
   // Specify targets for spellings.
   list<TargetSpecificSpelling> TargetSpecificSpellings = [];
-  // Set to true for attributes with arguments which require delayed parsing.
-  bit LateParsed = 0;
+  // Specifies the late parsing kind.
+  LateAttrParseKind LateParsed = LateAttrParseNever;
   // Set to false to prevent an attribute from being propagated from a template
   // to the instantiation.
   bit Clone = 1;
@@ -3173,7 +3216,7 @@ def DiagnoseIf : InheritableAttr {
               BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
               DeclArgument<Named, "Parent", 0, /*fake*/ 1>];
   let InheritEvenIfAlreadyPresent = 1;
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let AdditionalMembers = [{
     bool isError() const { return diagnosticType == DT_Error; }
     bool isWarning() const { return diagnosticType == DT_Warning; }
@@ -3472,7 +3515,7 @@ def AssertCapability : InheritableAttr {
   let Spellings = [Clang<"assert_capability", 0>,
                    Clang<"assert_shared_capability", 0>];
   let Subjects = SubjectList<[Function]>;
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3488,7 +3531,7 @@ def AcquireCapability : InheritableAttr {
                    GNU<"exclusive_lock_function">,
                    GNU<"shared_lock_function">];
   let Subjects = SubjectList<[Function]>;
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3504,7 +3547,7 @@ def TryAcquireCapability : InheritableAttr {
                    Clang<"try_acquire_shared_capability", 0>];
   let Subjects = SubjectList<[Function],
                              ErrorDiag>;
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3520,7 +3563,7 @@ def ReleaseCapability : InheritableAttr {
                    Clang<"release_generic_capability", 0>,
                    Clang<"unlock_function", 0>];
   let Subjects = SubjectList<[Function]>;
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3539,7 +3582,7 @@ def RequiresCapability : InheritableAttr {
                    Clang<"requires_shared_capability", 0>,
                    Clang<"shared_locks_required", 0>];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3559,7 +3602,7 @@ def NoThreadSafetyAnalysis : InheritableAttr {
 def GuardedBy : InheritableAttr {
   let Spellings = [GNU<"guarded_by">];
   let Args = [ExprArgument<"Arg">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3570,7 +3613,7 @@ def GuardedBy : InheritableAttr {
 def PtGuardedBy : InheritableAttr {
   let Spellings = [GNU<"pt_guarded_by">];
   let Args = [ExprArgument<"Arg">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3581,7 +3624,7 @@ def PtGuardedBy : InheritableAttr {
 def AcquiredAfter : InheritableAttr {
   let Spellings = [GNU<"acquired_after">];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3592,7 +3635,7 @@ def AcquiredAfter : InheritableAttr {
 def AcquiredBefore : InheritableAttr {
   let Spellings = [GNU<"acquired_before">];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3603,7 +3646,7 @@ def AcquiredBefore : InheritableAttr {
 def AssertExclusiveLock : InheritableAttr {
   let Spellings = [GNU<"assert_exclusive_lock">];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3614,7 +3657,7 @@ def AssertExclusiveLock : InheritableAttr {
 def AssertSharedLock : InheritableAttr {
   let Spellings = [GNU<"assert_shared_lock">];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3627,7 +3670,7 @@ def AssertSharedLock : InheritableAttr {
 def ExclusiveTrylockFunction : InheritableAttr {
   let Spellings = [GNU<"exclusive_trylock_function">];
   let Args = [ExprArgument<"SuccessValue">, VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3640,7 +3683,7 @@ def ExclusiveTrylockFunction : InheritableAttr {
 def SharedTrylockFunction : InheritableAttr {
   let Spellings = [GNU<"shared_trylock_function">];
   let Args = [ExprArgument<"SuccessValue">, VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
@@ -3651,7 +3694,7 @@ def SharedTrylockFunction : InheritableAttr {
 def LockReturned : InheritableAttr {
   let Spellings = [GNU<"lock_returned">];
   let Args = [ExprArgument<"Arg">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let Subjects = SubjectList<[Function]>;
@@ -3661,7 +3704,7 @@ def LockReturned : InheritableAttr {
 def LocksExcluded : InheritableAttr {
   let Spellings = [GNU<"locks_excluded">];
   let Args = [VariadicExprArgument<"Args">];
-  let LateParsed = 1;
+  let LateParsed = LateAttrParseStandard;
   let TemplateDependent = 1;
   let ParseArgumentsAsUnevaluated = 1;
   let InheritEvenIfAlreadyPresent = 1;
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 8ef6700ecdc78e..55c81eab1ec150 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -164,6 +164,7 @@ LANGOPT(ExperimentalLibrary, 1, 0, "enable unstable and experimental library fea
 LANGOPT(PointerAuthIntrinsics, 1, 0, "pointer authentication intrinsics")
 
 LANGOPT(DoubleSquareBracketAttributes, 1, 0, "'[[]]' attributes extension for all language standard modes")
+LANGOPT(ExperimentalLateParseAttributes, 1, 0, "experimental late parsing of attributes")
 
 COMPATIBLE_LANGOPT(RecoveryAST, 1, 1, "Preserve expressions in AST when encountering errors")
 COMPATIBLE_LANGOPT(RecoveryASTType, 1, 1, "Preserve the type in recovery expressions")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 25f479dccc3c80..864da4e1157f7d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1608,6 +1608,13 @@ defm double_square_bracket_attributes : BoolFOption<"double-square-bracket-attri
   LangOpts<"DoubleSquareBracketAttributes">, DefaultTrue, PosFlag<SetTrue>,
  NegFlag<SetFalse>>;
 
+defm experimental_late_parse_attributes : BoolFOption<"experimental-late-parse-attributes",
+  LangOpts<"ExperimentalLateParseAttributes">, DefaultFalse,
+  PosFlag<SetTrue, [], [ClangOption], "Enable">,
+  NegFlag<SetFalse, [], [ClangOption], "Disable">,
+  BothFlags<[], [ClangOption, CC1Option],
+          " experimental late parsing of attributes">>;
+
 defm autolink : BoolFOption<"autolink",
   CodeGenOpts<"Autolink">, DefaultTrue,
   NegFlag<SetFalse, [], [ClangOption, CC1Option],
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index fb117bf04087ee..81aab8c888ab65 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -1398,12 +1398,21 @@ class Parser : public CodeCompletionHandler {
   // A list of late-parsed attributes.  Used by ParseGNUAttributes.
   class LateParsedAttrList: public SmallVector<LateParsedAttribute *, 2> {
   public:
-    LateParsedAttrList(bool PSoon = false) : ParseSoon(PSoon) { }
+    LateParsedAttrList(bool PSoon = false,
+                       bool LateAttrParseExperimentalExtOnly = false)
+        : ParseSoon(PSoon),
+          LateAttrParseExperimentalExtOnly(LateAttrParseExperimentalExtOnly) {}
 
     bool parseSoon() { return ParseSoon; }
+    /// returns true iff the attribute to be parsed should only be late parsed
+    /// if it is annotated with `LateAttrParseExperimentalExt`
+    bool lateAttrParseExperimentalExtOnly() {
+      return LateAttrParseExperimentalExtOnly;
+    }
 
   private:
-    bool ParseSoon;  // Are we planning to parse these shortly after creation?
+    bool ParseSoon; // Are we planning to parse these shortly after creation?
+    bool LateAttrParseExperimentalExtOnly;
   };
 
   /// Contains the lexed tokens of a member function definition
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 250dc078edf242..1988a90a48ae6b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7490,6 +7490,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.addOptInFlag(CmdArgs, options::OPT_fsafe_buffer_usage_suggestions,
                     options::OPT_fno_safe_buffer_usage_suggestions);
 
+  Args.addOptInFlag(CmdArgs, options::OPT_fexperimental_late_parse_attributes,
+                    options::OPT_fno_experimental_late_parse_attributes);
+
   // Setup statistics file output.
   SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
   if (!StatsFile.empty()) {
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index a7846e102a43c7..7431c256d2c135 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -91,13 +91,23 @@ static StringRef normalizeAttrName(StringRef Name) {
   return Name;
 }
 
-/// isAttributeLateParsed - Return true if the attribute has arguments that
-/// require late parsing.
-static bool isAttributeLateParsed(const IdentifierInfo &II) {
+/// returns true iff attribute is annotated with `LateAttrParseExperimentalExt`
+/// in `Attr.td`.
+static bool IsAttributeLateParsedExperimentalExt(const IdentifierInfo &II) {
+#define CLANG_ATTR_LATE_PARSED_EXPERIMENTAL_EXT_LIST
+  return llvm::StringSwitch<bool>(normalizeAttrName(II.getName()))
+#include "clang/Parse/AttrParserStringSwitches.inc"
+      .Default(false);
+#undef CLANG_ATTR_LATE_PARSED_EXPERIMENTAL_EXT_LIST
+}
+
+/// returns true iff attribute is annotated with `LateAttrParseStandard` in
+/// `Attr.td`.
+static bool IsAttributeLateParsedStandard(const IdentifierInfo &II) {
 #define CLANG_ATTR_LATE_PARSED_LIST
-    return llvm::StringSwitch<bool>(normalizeAttrName(II.getName()))
+  return llvm::StringSwitch<bool>(normalizeAttrName(II.getName()))
 #include "clang/Parse/AttrParserStringSwitches.inc"
-        .Default(false);
+      .Default(false);
 #undef CLANG_ATTR_LATE_PARSED_LIST
 }
 
@@ -222,8 +232,26 @@ void Parser::ParseGNUAttributes(ParsedAttributes &Attrs,
         continue;
       }
 
+      bool LateParse = false;
+      if (!LateAttrs)
+        LateParse = false;
+      else if (LateAttrs->lateAttrParseExperimentalExtOnly()) {
+        // The caller requested that this attribute **only** be late
+        // parsed for `LateAttrParseExperimentalExt` attributes. This will
+        // only be late parsed if the experimental language option is enabled.
+        LateParse = getLangOpts().ExperimentalLateParseAttributes &&
+                    IsAttributeLateParsedExperimentalExt(*AttrName);
+      } else {
+        // The caller did not restrict late parsing to only
+        // `LateAttrParseExperimentalExt` attributes so late parse
+        // both `LateAttrParseStandard` and `LateAttrParseExperimentalExt`
+        // attributes.
+        LateParse = IsAttributeLateParsedExperimentalExt(*AttrName) ||
+                    IsAttributeLateParsedStandard(*AttrName);
+      }
+
       // Handle "parameterized" attributes
-      if (!LateAttrs || !isAttributeLateParsed(*AttrName)) {
+      if (!LateParse) {
         ParseGNUAttributeArgs(AttrName, AttrNameLoc, Attrs, &EndLoc, nullptr,
                               SourceLocation(), ParsedAttr::Form::GNU(), D);
         continue;
diff --git a/clang/test/Driver/experimental-late-parse-attributes.c b/clang/test/Driver/experimental-late-parse-attributes.c
new file mode 100644
index 00000000000000..6b54b898afa749
--- /dev/null
+++ b/clang/test/Driver/experimental-late-parse-attributes.c
@@ -0,0 +1,12 @@
+// RUN: %clang %s -c -fexperimental-late-parse-attributes 2>&1 -### | FileCheck %s -check-prefix=CHECK-ON
+// RUN: %clang %s -c -fno-experimental-late-parse-attributes -fexperimental-late-parse-attributes 2>&1 -### | FileCheck %s -check-prefix=CHECK-ON
+
+// CHECK-ON: -cc1
+// CHECK-ON: -fexperimental-late-parse-attributes
+
+// RUN: %clang %s -c 2>&1 -### | FileCheck %s -check-prefix=CHECK-OFF
+// RUN: %clang %s -c -fno-experimental-late-parse-attributes 2>&1 -### | FileCheck %s -check-prefix=CHECK-OFF
+// RUN: %clang %s -c -fexperimental-late-parse-attributes -fno-experimental-late-parse-attributes 2>&1 -### | FileCheck %s -check-prefix=CHECK-OFF
+
+// CHECK-OFF: -cc1
+// CHECK-OFF-NOT: -fexperimental-late-parse-attributes
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 0d1365f09291e0..aafbf1f40949a2 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -1822,28 +1822,101 @@ void WriteSemanticSpellingSwitch(const std::string &VarName,
   OS << "  }\n";
 }
 
+// Note: these values need to match the values used by LateAttrParseKind in
+// `Attr.td`
+enum class LateAttrParseKind { Never = 0, Standard = 1, ExperimentalExt = 2 };
+
+static LateAttrParseKind getLateAttrParseKind(const Record *Attr) {
+  // This function basically does
+  // `Attr->getValueAsDef("LateParsed")->getValueAsInt("Kind")` but does a bunch
+  // of sanity checking to ensure that `LateAttrParseMode` in `Attr.td` is in
+  // sync with the `LateAttrParseKind` enum in this source file.
+
+  static constexpr StringRef LateParsedStr = "LateParsed";
+  static constexpr StringRef LateAttrParseKindStr = "LateAttrParseKind";
+  static constexpr StringRef KindFieldStr = "Kind";
+
+  auto *LAPK = Attr->getValueAsDef(LateParsedStr);
+
+  // Typecheck the `LateParsed` field.
+  SmallVector<Record *, 1> SuperClasses;
+  LAPK->getDirectSuperClasses(SuperClasses);
+  if (SuperClasses.size() != 1)
+    PrintFatalError(Attr, "Field `" + llvm::Twine(LateParsedStr) +
+                              "`should only have one super class");
+
+  if (SuperClasses[0]->getName().compare(LateAttrParseKindStr) != 0)
+    PrintFatalError(Attr, "Field `" + llvm::Twine(LateParsedStr) +
+                              "`should only have type `" +
+                              llvm::Twine(LateAttrParseKindStr) +
+                              "` but found type `" +
+                              SuperClasses[0]->getName() + "`");
+
+  // Get Kind and verify the enum name matches the name in `Attr.td`.
+  unsigned Kind = LAPK->getValueAsInt(KindFieldStr);
+  switch (LateAttrParseKind(Kind)) {
+#define CASE(X)                                                                \
+  case LateAttrParseKind::X:                                                   \
+    if (LAPK->getName().compare("LateAttrParse" #X) != 0) {                    \
+      PrintFatalError(Attr,                                                    \
+                      "Field `" + llvm::Twine(LateParsedStr) + "` set to `" +  \
+                          LAPK->getName() +                                    \
+                          "` but this converts to `LateAttrParseKind::" +      \
+                          llvm::Twine(#X) + "`");                              \
+    }                                                                          \
+    return LateAttrParseKind::X;
+
+    CASE(Never)
+    CASE(Standard)
+    CASE(ExperimentalExt)
+#undef CASE
+  }
+
+  // The Kind value is completely invalid
+  auto KindValueStr = llvm::utostr(Kind);
+  PrintFatalError(Attr, "Field `" + llvm::Twine(LateParsedStr) + "` set to `" +
+                            LAPK->getName() + "` has unexpected `" +
+                            llvm::Twine(KindFieldStr) + "` value of " +
+                            KindValueStr);
+}
+
 // Emits the LateParsed property for attributes.
-static void emitClangAttrLateParsedList(RecordKeeper &Records, raw_ostream &OS) {
-  OS << "#if defined(CLANG_ATTR_LATE_PARSED_LIST)\n";
-  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+static void emitClangAttrLateParsedListImpl(RecordKeeper &Records,
+                                            raw_ostream &OS,
+                                            LateAttrParseKind LateParseMode) {
+  std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
 
   for (const auto *Attr : Attrs) {
-    bool LateParsed = Attr->getValueAsBit("LateParsed");
+    if (LateAttrParseKind LateParsed = getLateAttrParseKind(Attr);
+        LateParsed != LateParseMode)
+      continue;
 
-    if (LateParsed) {
-      std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*Attr);
+    std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*Attr);
 
-      // FIXME: Handle non-GNU attributes
-      for (const auto &I : Spellings) {
-        if (I.variety() != "GNU")
-          continue;
-        OS << ".Case(\"" << I.name() << "\", " << LateParsed << ")\n";
-      }
+    // FIXME: Handle non-GNU attributes
+    for (const auto &I : Spellings) {
+      if (I.variety() != "GNU")
+        continue;
+      OS << ".Case(\"" << I.name() << "\", 1)\n";
     }
   }
+}
+
+static void emitClangAttrLateParsedList(RecordKeeper &Records,
+                                        raw_ostream &OS) {
+  OS << "#if defined(CLANG_ATTR_LATE_PARSED_LIST)\n";
+  emitClangAttrLateParsedListImpl(Records, OS, LateAttrParseKind::Standard);
   OS << "#endif // CLANG_ATTR_LATE_PARSED_LIST\n\n";
 }
 
+static void emitClangAttrLateParsedExperimentalList(RecordKeeper &Records,
+                                                    raw_ostream &OS) {
+  OS << "#if defined(CLANG_ATTR_LATE_PARSED_EXPERIMENTAL_EXT_LIST)\n";
+  emitClangAttrLateParsedListImpl(Records, OS,
+                                  LateAttrParseKind::ExperimentalExt);
+  OS << "#endif // CLANG_ATTR_LATE_PARSED_EXPERIMENTAL_EXT_LIST\n\n";
+}
+
 static bool hasGNUorCXX11Spelling(const Record &Attribute) {
   std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(Attribute);
   for (const auto &I : Spellings) {
@@ -2101,9 +2174,21 @@ bool PragmaClangAttributeSupport::isAttributedSupported(
     return SpecifiedResult;
 
   // Opt-out rules:
-  // An attribute requires delayed parsing (LateParsed is on)
-  if (Attribute.getValueAsBit("LateParsed"))
+
+  // An attribute requires delayed parsing (LateParsed is on).
+  switch (getLateAttrParseKind(&Attribute)) {
+  case LateAttrParseKind::Never:
+    break;
+  case LateAttrParseKind::Standard:
+    return false;
+  case LateAttrParseKind::ExperimentalExt:
+    // This is only late parsed in certain parsing contexts when
+    // `LangOpts.ExperimentalLateParseAttributes` is true. Information about the
+    // parsing context and `LangOpts` is not available in this method so just
+    // opt this attribute out.
     return false;
+  }
+
   // An attribute has no GNU/CXX11 spelling
   if (!hasGNUorCXX11Spelling(Attribute))
     return false;
@@ -2885,8 +2970,27 @@ static void emitAttributes(RecordKeeper &Records, raw_ostream &OS,
         return;
       }
       OS << "\n  : " << SuperName << "(Ctx, CommonInfo, ";
-      OS << "attr::" << R.getName() << ", "
-         << (R.getValueAsBit("LateParsed") ? "true" : "false");
+      OS << "attr::" << R.getName() << ", ";
+
+      // Handle different late parsing modes.
+      OS << "/*IsLateParsed=*/";
+      switch (getLateAttrParseKind(&R)) {
+      case LateAttrParseKind::Never:
+        OS << "false";
+        break;
+      case LateAttrParseKind::ExperimentalExt:
+        // Currently no clients need to know the distinction between `Standard`
+        // and `ExperimentalExt` so treat `ExperimentalExt` just like
+        // `Standard` for now.
+      case LateAttrParseKind::Standard:
+        // Note: This is misleading. `IsLateParsed` doesn't mean the
+        // attribute was actually late parsed. Instead it means the attribute in
+        // `Attr.td` is marked as being late parsed. Maybe it should be called
+        // `IsLateParseable`?
+        OS << "true";
+        break;
+      }
+
       if (Inheritable) {
         OS << ", "
            << (R.getValueAsBit("InheritEvenIfAlreadyPresent") ? "true"
@@ -4843,6 +4947,7 @@ void EmitClangAttrParserStringSwitches(RecordKeeper &Records, raw_ostream &OS) {
   emitClangAttrAcceptsExprPack(Records, OS);
   emitClangAttrTypeArgList(Records, OS);
   emitClangAttrLateParsedList(Records, OS);
+  emitClangAttrLateParsedExperimentalList(Records, OS);
 }
 
 void EmitClangAttrSubjectMatchRulesParserStringSwitches(RecordKeeper &Records,

From 6ea0c0a28343b2676baf480db490b5a27fa11d7c Mon Sep 17 00:00:00 2001
From: paperchalice <liujunchang97@outlook.com>
Date: Tue, 30 Apr 2024 09:54:48 +0800
Subject: [PATCH 12/65] [NewPM][CodeGen] Add `MachineFunctionAnalysis` (#88610)

In new pass system, `MachineFunction` could be an analysis result again,
machine module pass can now fetch them from analysis manager.
`MachineModuleInfo` no longer owns them.
Remove `FreeMachineFunctionPass`, replaced by
`InvalidateAnalysisPass<MachineFunctionAnalysis>`.

Now `FreeMachineFunction` is replaced by
`InvalidateAnalysisPass<MachineFunctionAnalysis>`, the workaround in
`MachineFunctionPassManager` is no longer needed, there is no difference
between `unittests/MIR/PassBuilderCallbacksTest.cpp` and
`unittests/IR/PassBuilderCallbacksTest.cpp`.
---
 .../llvm/CodeGen/FreeMachineFunction.h        |  24 -
 .../llvm/CodeGen/MIRParser/MIRParser.h        |  12 +
 .../llvm/CodeGen/MachineFunctionAnalysis.h    |  50 ++
 llvm/include/llvm/CodeGen/MachineModuleInfo.h |   4 +
 .../include/llvm/CodeGen/MachinePassManager.h |  41 +-
 llvm/include/llvm/IR/LLVMContext.h            |   6 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  18 +-
 .../llvm/Passes/MachinePassRegistry.def       |   1 -
 llvm/lib/CodeGen/CMakeLists.txt               |   2 +-
 .../CodeGen/DeadMachineInstructionElim.cpp    |   2 +-
 llvm/lib/CodeGen/FreeMachineFunction.cpp      |  22 -
 llvm/lib/CodeGen/MIRParser/MIRParser.cpp      |  48 +-
 llvm/lib/CodeGen/MachineFunctionAnalysis.cpp  |  46 ++
 llvm/lib/CodeGen/MachinePassManager.cpp       | 103 ++--
 llvm/lib/IR/LLVMContext.cpp                   |   7 +
 llvm/lib/IR/LLVMContextImpl.h                 |   4 +
 llvm/lib/Passes/PassBuilder.cpp               |  22 +-
 llvm/lib/Passes/PassRegistry.def              |   3 +
 llvm/lib/Passes/StandardInstrumentations.cpp  |   8 -
 llvm/test/tools/llc/new-pm/pipeline.ll        |   2 +-
 llvm/test/tools/llc/new-pm/pipeline.mir       |   2 +-
 llvm/test/tools/llc/new-pm/start-stop.ll      |   2 +-
 llvm/tools/llc/NewPMDriver.cpp                |  10 +-
 llvm/unittests/CodeGen/PassManagerTest.cpp    |  14 +-
 llvm/unittests/MIR/CMakeLists.txt             |   1 -
 .../MIR/PassBuilderCallbacksTest.cpp          | 575 ------------------
 26 files changed, 298 insertions(+), 731 deletions(-)
 delete mode 100644 llvm/include/llvm/CodeGen/FreeMachineFunction.h
 create mode 100644 llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
 delete mode 100644 llvm/lib/CodeGen/FreeMachineFunction.cpp
 create mode 100644 llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
 delete mode 100644 llvm/unittests/MIR/PassBuilderCallbacksTest.cpp

diff --git a/llvm/include/llvm/CodeGen/FreeMachineFunction.h b/llvm/include/llvm/CodeGen/FreeMachineFunction.h
deleted file mode 100644
index 5f21c6720350bc..00000000000000
--- a/llvm/include/llvm/CodeGen/FreeMachineFunction.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- llvm/CodeGen/FreeMachineFunction.h -----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_FREEMACHINEFUNCTION_H
-#define LLVM_CODEGEN_FREEMACHINEFUNCTION_H
-
-#include "llvm/CodeGen/MachinePassManager.h"
-
-namespace llvm {
-
-class FreeMachineFunctionPass : public PassInfoMixin<FreeMachineFunctionPass> {
-public:
-  PreservedAnalyses run(MachineFunction &MF,
-                        MachineFunctionAnalysisManager &MFAM);
-};
-
-} // namespace llvm
-
-#endif // LLVM_CODEGEN_FREEMACHINEFUNCTION_H
diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
index e1606e7c0ea72d..ae0938a48a7110 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -34,6 +34,9 @@ class MachineModuleInfo;
 class SMDiagnostic;
 class StringRef;
 
+template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
+using ModuleAnalysisManager = AnalysisManager<Module>;
+
 typedef llvm::function_ref<std::optional<std::string>(StringRef, StringRef)>
     DataLayoutCallbackTy;
 
@@ -60,6 +63,15 @@ class MIRParser {
   ///
   /// \returns true if an error occurred.
   bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
+
+  /// Parses MachineFunctions in the MIR file and add them as the result
+  /// of MachineFunctionAnalysis in ModulePassManager \p MAM.
+  /// User should register at least MachineFunctionAnalysis,
+  /// MachineModuleAnalysis, FunctionAnalysisManagerModuleProxy and
+  /// PassInstrumentationAnalysis in \p MAM before parsing MIR.
+  ///
+  /// \returns true if an error occurred.
+  bool parseMachineFunctions(Module &M, ModuleAnalysisManager &MAM);
 };
 
 /// This function is the main interface to the MIR serialization format parser.
diff --git a/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
new file mode 100644
index 00000000000000..f54d455292da03
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -0,0 +1,50 @@
+//===- llvm/CodeGen/MachineFunctionAnalysis.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineFunctionAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS
+#define LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class MachineFunction;
+class LLVMTargetMachine;
+
+/// This analysis create MachineFunction for given Function.
+/// To release the MachineFunction, users should invalidate it explicitly.
+class MachineFunctionAnalysis
+    : public AnalysisInfoMixin<MachineFunctionAnalysis> {
+  friend AnalysisInfoMixin<MachineFunctionAnalysis>;
+
+  static AnalysisKey Key;
+
+  const LLVMTargetMachine *TM;
+
+public:
+  class Result {
+    std::unique_ptr<MachineFunction> MF;
+
+  public:
+    Result(std::unique_ptr<MachineFunction> MF) : MF(std::move(MF)) {}
+    MachineFunction &getMF() { return *MF; };
+    bool invalidate(Function &, const PreservedAnalyses &PA,
+                    FunctionAnalysisManager::Invalidator &);
+  };
+
+  MachineFunctionAnalysis(const LLVMTargetMachine *TM) : TM(TM){};
+  Result run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MachineFunctionAnalysis
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 5f66d1ada0d082..92ea3c902ce95e 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -147,10 +147,14 @@ class MachineModuleInfo {
 
   /// Returns the MachineFunction constructed for the IR function \p F.
   /// Creates a new MachineFunction if none exists yet.
+  /// NOTE: New pass manager clients shall not use this method to get
+  /// the `MachineFunction`, use `MachineFunctionAnalysis` instead.
   MachineFunction &getOrCreateMachineFunction(Function &F);
 
   /// \brief Returns the MachineFunction associated to IR function \p F if there
   /// is one, otherwise nullptr.
+  /// NOTE: New pass manager clients shall not use this method to get
+  /// the `MachineFunction`, use `MachineFunctionAnalysis` instead.
   MachineFunction *getMachineFunction(const Function &F) const;
 
   /// Delete the MachineFunction \p MF and reset the link in the IR Function to
diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h
index 4f0b6ba2b1e738..852b1a0f416139 100644
--- a/llvm/include/llvm/CodeGen/MachinePassManager.h
+++ b/llvm/include/llvm/CodeGen/MachinePassManager.h
@@ -108,6 +108,15 @@ bool MachineFunctionAnalysisManagerModuleProxy::Result::invalidate(
     ModuleAnalysisManager::Invalidator &Inv);
 extern template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
                                                 Module>;
+using MachineFunctionAnalysisManagerFunctionProxy =
+    InnerAnalysisManagerProxy<MachineFunctionAnalysisManager, Function>;
+
+template <>
+bool MachineFunctionAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv);
+extern template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
+                                                Function>;
 
 extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
                                                 MachineFunction>;
@@ -129,16 +138,6 @@ class FunctionAnalysisManagerMachineFunctionProxy
       Arg.FAM = nullptr;
     }
 
-    ~Result() {
-      // FAM is cleared in a moved from state where there is nothing to do.
-      if (!FAM)
-        return;
-
-      // Clear out the analysis manager if we're being destroyed -- it means we
-      // didn't even see an invalidate call when we got invalidated.
-      FAM->clear();
-    }
-
     Result &operator=(Result &&RHS) {
       FAM = RHS.FAM;
       // We have to null out the analysis manager in the moved-from state
@@ -187,18 +186,18 @@ class FunctionAnalysisManagerMachineFunctionProxy
   FunctionAnalysisManager *FAM;
 };
 
-class ModuleToMachineFunctionPassAdaptor
-    : public PassInfoMixin<ModuleToMachineFunctionPassAdaptor> {
+class FunctionToMachineFunctionPassAdaptor
+    : public PassInfoMixin<FunctionToMachineFunctionPassAdaptor> {
 public:
   using PassConceptT =
       detail::PassConcept<MachineFunction, MachineFunctionAnalysisManager>;
 
-  explicit ModuleToMachineFunctionPassAdaptor(
+  explicit FunctionToMachineFunctionPassAdaptor(
       std::unique_ptr<PassConceptT> Pass)
       : Pass(std::move(Pass)) {}
 
-  /// Runs the function pass across every function in the module.
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  /// Runs the function pass across every function in the function.
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
   void printPipeline(raw_ostream &OS,
                      function_ref<StringRef(StringRef)> MapClassName2PassName);
 
@@ -209,14 +208,14 @@ class ModuleToMachineFunctionPassAdaptor
 };
 
 template <typename MachineFunctionPassT>
-ModuleToMachineFunctionPassAdaptor
-createModuleToMachineFunctionPassAdaptor(MachineFunctionPassT &&Pass) {
+FunctionToMachineFunctionPassAdaptor
+createFunctionToMachineFunctionPassAdaptor(MachineFunctionPassT &&Pass) {
   using PassModelT = detail::PassModel<MachineFunction, MachineFunctionPassT,
                                        MachineFunctionAnalysisManager>;
   // Do not use make_unique, it causes too many template instantiations,
   // causing terrible compile times.
-  return ModuleToMachineFunctionPassAdaptor(
-      std::unique_ptr<ModuleToMachineFunctionPassAdaptor::PassConceptT>(
+  return FunctionToMachineFunctionPassAdaptor(
+      std::unique_ptr<FunctionToMachineFunctionPassAdaptor::PassConceptT>(
           new PassModelT(std::forward<MachineFunctionPassT>(Pass))));
 }
 
@@ -244,6 +243,10 @@ extern template class PassManager<MachineFunction>;
 /// Convenience typedef for a pass manager over functions.
 using MachineFunctionPassManager = PassManager<MachineFunction>;
 
+/// Returns the minimum set of Analyses that all machine function passes must
+/// preserve.
+PreservedAnalyses getMachineFunctionPassPreservedAnalyses();
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEPASSMANAGER_H
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index e5786afd72214b..89ad6f1572c679 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -155,6 +155,10 @@ class LLVMContext {
   void enableDebugTypeODRUniquing();
   void disableDebugTypeODRUniquing();
 
+  /// generateMachineFunctionNum - Get a unique number for MachineFunction
+  /// that associated with the given Function.
+  unsigned generateMachineFunctionNum(Function &);
+
   /// Defines the type of a yield callback.
   /// \see LLVMContext::setYieldCallback.
   using YieldCallbackTy = void (*)(LLVMContext *Context, void *OpaqueHandle);
@@ -332,7 +336,7 @@ class LLVMContext {
   void addModule(Module*);
 
   /// removeModule - Unregister a module from this context.
-  void removeModule(Module*);
+  void removeModule(Module *);
 };
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 2e94a19502131a..17bea5da48ce14 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/DwarfEHPrepare.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandReductions.h"
-#include "llvm/CodeGen/FreeMachineFunction.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GlobalMerge.h"
 #include "llvm/CodeGen/IndirectBrExpand.h"
@@ -38,6 +37,8 @@
 #include "llvm/CodeGen/JMCInstrumenter.h"
 #include "llvm/CodeGen/LowerEmuTLS.h"
 #include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -199,8 +200,13 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
     AddMachinePass(ModulePassManager &MPM, const DerivedT &PB)
         : MPM(MPM), PB(PB) {}
     ~AddMachinePass() {
-      if (!MFPM.isEmpty())
-        MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
+      if (!MFPM.isEmpty()) {
+        FunctionPassManager FPM;
+        FPM.addPass(
+            createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+        FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
+        MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      }
     }
 
     template <typename PassT>
@@ -219,8 +225,8 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
       } else {
         // Add Module Pass
         if (!MFPM.isEmpty()) {
-          MPM.addPass(
-              createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
+          MPM.addPass(createModuleToFunctionPassAdaptor(
+              createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
           MFPM = MachineFunctionPassManager();
         }
 
@@ -512,6 +518,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::buildPipeline(
 
   {
     AddIRPass addIRPass(MPM, derived());
+    addIRPass(RequireAnalysisPass<MachineModuleAnalysis, Module>());
     addIRPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
     addIRPass(RequireAnalysisPass<CollectorMetadataAnalysis, Module>());
     addISelPasses(addIRPass);
@@ -538,7 +545,6 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::buildPipeline(
   if (PrintMIR)
     addPass(PrintMIRPass(Out), /*Force=*/true);
 
-  addPass(FreeMachineFunctionPass());
   return verifyStartStop(*StartStopInfo);
 }
 
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 2f77ae655d9b22..5a14d619ea076f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -124,7 +124,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PI
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
-// MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
 MACHINE_FUNCTION_PASS("print", PrintMIRPass())
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 2c24de60edd43e..77bf1b165d0cf7 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -65,8 +65,8 @@ add_llvm_component_library(LLVMCodeGen
   FEntryInserter.cpp
   FinalizeISel.cpp
   FixupStatepointCallerSaved.cpp
-  FreeMachineFunction.cpp
   FuncletLayout.cpp
+  MachineFunctionAnalysis.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCRootLowering.cpp
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index facc01452d2f12..578854cdb4a5dc 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -68,7 +68,7 @@ DeadMachineInstructionElimPass::run(MachineFunction &MF,
                                     MachineFunctionAnalysisManager &) {
   if (!DeadMachineInstructionElimImpl().runImpl(MF))
     return PreservedAnalyses::all();
-  PreservedAnalyses PA;
+  PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
   PA.preserveSet<CFGAnalyses>();
   return PA;
 }
diff --git a/llvm/lib/CodeGen/FreeMachineFunction.cpp b/llvm/lib/CodeGen/FreeMachineFunction.cpp
deleted file mode 100644
index f38f3e63a3f37a..00000000000000
--- a/llvm/lib/CodeGen/FreeMachineFunction.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===- FreeMachineFunction.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/FreeMachineFunction.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-
-using namespace llvm;
-
-PreservedAnalyses
-FreeMachineFunctionPass::run(MachineFunction &MF,
-                             MachineFunctionAnalysisManager &MFAM) {
-  auto &MMI = MF.getMMI();
-  MFAM.invalidate(MF, PreservedAnalyses::none());
-  MMI.deleteMachineFunctionFor(MF.getFunction()); // MF is dangling now.
-  return PreservedAnalyses::none();
-}
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 4d9a8dc5602ba6..b65fc8cf5099b8 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
@@ -97,13 +98,15 @@ class MIRParserImpl {
   /// Create an empty function with the given name.
   Function *createDummyFunction(StringRef Name, Module &M);
 
-  bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
+  bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI,
+                             ModuleAnalysisManager *FAM = nullptr);
 
   /// Parse the machine function in the current YAML document.
   ///
   ///
   /// Return true if an error occurred.
-  bool parseMachineFunction(Module &M, MachineModuleInfo &MMI);
+  bool parseMachineFunction(Module &M, MachineModuleInfo &MMI,
+                            ModuleAnalysisManager *FAM);
 
   /// Initialize the machine function to the state that's described in the MIR
   /// file.
@@ -275,13 +278,14 @@ MIRParserImpl::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
   return M;
 }
 
-bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI,
+                                          ModuleAnalysisManager *MAM) {
   if (NoMIRDocuments)
     return false;
 
   // Parse the machine functions.
   do {
-    if (parseMachineFunction(M, MMI))
+    if (parseMachineFunction(M, MMI, MAM))
       return true;
     In.nextDocument();
   } while (In.setCurrentDocument());
@@ -303,7 +307,8 @@ Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
   return F;
 }
 
-bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
+bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI,
+                                         ModuleAnalysisManager *MAM) {
   // Parse the yaml.
   yaml::MachineFunction YamlMF;
   yaml::EmptyContext Ctx;
@@ -327,14 +332,28 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
                    "' isn't defined in the provided LLVM IR");
     }
   }
-  if (MMI.getMachineFunction(*F) != nullptr)
-    return error(Twine("redefinition of machine function '") + FunctionName +
-                 "'");
 
-  // Create the MachineFunction.
-  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
-  if (initializeMachineFunction(YamlMF, MF))
-    return true;
+  if (!MAM) {
+    if (MMI.getMachineFunction(*F) != nullptr)
+      return error(Twine("redefinition of machine function '") + FunctionName +
+                   "'");
+
+    // Create the MachineFunction.
+    MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+    if (initializeMachineFunction(YamlMF, MF))
+      return true;
+  } else {
+    auto &FAM =
+        MAM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+    if (FAM.getCachedResult<MachineFunctionAnalysis>(*F))
+      return error(Twine("redefinition of machine function '") + FunctionName +
+                   "'");
+
+    // Create the MachineFunction.
+    MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(*F).getMF();
+    if (initializeMachineFunction(YamlMF, MF))
+      return true;
+  }
 
   return false;
 }
@@ -1101,6 +1120,11 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
   return Impl->parseMachineFunctions(M, MMI);
 }
 
+bool MIRParser::parseMachineFunctions(Module &M, ModuleAnalysisManager &MAM) {
+  auto &MMI = MAM.getResult<MachineModuleAnalysis>(M).getMMI();
+  return Impl->parseMachineFunctions(M, MMI, &MAM);
+}
+
 std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
     StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
     std::function<void(Function &)> ProcessIRFunction) {
diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 00000000000000..519a77c44acf84
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,46 @@
+//===- MachineFunctionAnalysis.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis
+// members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+AnalysisKey MachineFunctionAnalysis::Key;
+
+bool MachineFunctionAnalysis::Result::invalidate(
+    Function &, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &) {
+  // Unless it is invalidated explicitly, it should remain preserved.
+  auto PAC = PA.getChecker<MachineFunctionAnalysis>();
+  return !PAC.preservedWhenStateless();
+}
+
+MachineFunctionAnalysis::Result
+MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
+  auto &Context = F.getContext();
+  const TargetSubtargetInfo &STI = *TM->getSubtargetImpl(F);
+  auto &MMI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+                  .getCachedResult<MachineModuleAnalysis>(*F.getParent())
+                  ->getMMI();
+  auto MF = std::make_unique<MachineFunction>(
+      F, *TM, STI, Context.generateMachineFunctionNum(F), MMI);
+  MF->initTargetMachineFunctionInfo(STI);
+
+  // MRI callback for target specific initializations.
+  TM->registerMachineRegisterInfoCallback(*MF);
+
+  return Result(std::move(MF));
+}
diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp
index 2763193b2c306b..6d540808d4cc98 100644
--- a/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -12,21 +12,24 @@
 
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/PassManagerImpl.h"
 
 using namespace llvm;
 
-namespace llvm {
-
 AnalysisKey FunctionAnalysisManagerMachineFunctionProxy::Key;
 
+namespace llvm {
 template class AnalysisManager<MachineFunction>;
 template class PassManager<MachineFunction>;
 template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
                                          Module>;
+template class InnerAnalysisManagerProxy<MachineFunctionAnalysisManager,
+                                         Function>;
 template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
                                          MachineFunction>;
+} // namespace llvm
 
 bool FunctionAnalysisManagerMachineFunctionProxy::Result::invalidate(
     MachineFunction &IR, const PreservedAnalyses &PA,
@@ -69,38 +72,65 @@ bool MachineFunctionAnalysisManagerModuleProxy::Result::invalidate(
   return false;
 }
 
+template <>
+bool MachineFunctionAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  // If literally everything is preserved, we're done.
+  if (PA.areAllPreserved())
+    return false; // This is still a valid proxy.
+
+  // If this proxy isn't marked as preserved, then even if the result remains
+  // valid, the key itself may no longer be valid, so we clear everything.
+  //
+  // Note that in order to preserve this proxy, a module pass must ensure that
+  // the MFAM has been completely updated to handle the deletion of functions.
+  // Specifically, any MFAM-cached results for those functions need to have been
+  // forcibly cleared. When preserved, this proxy will only invalidate results
+  // cached on functions *still in the module* at the end of the module pass.
+  auto PAC = PA.getChecker<MachineFunctionAnalysisManagerFunctionProxy>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) {
+    InnerAM->clear();
+    return true;
+  }
+
+  // FIXME: be more precise, see
+  // FunctionAnalysisManagerModuleProxy::Result::invalidate.
+  if (!PA.allAnalysesInSetPreserved<AllAnalysesOn<MachineFunction>>()) {
+    InnerAM->clear();
+    return true;
+  }
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
 PreservedAnalyses
-ModuleToMachineFunctionPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
-  auto &MMI = AM.getResult<MachineModuleAnalysis>(M).getMMI();
+FunctionToMachineFunctionPassAdaptor::run(Function &F,
+                                          FunctionAnalysisManager &FAM) {
   MachineFunctionAnalysisManager &MFAM =
-      AM.getResult<MachineFunctionAnalysisManagerModuleProxy>(M).getManager();
-  PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+      FAM.getResult<MachineFunctionAnalysisManagerFunctionProxy>(F)
+          .getManager();
+  PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
   PreservedAnalyses PA = PreservedAnalyses::all();
-  for (Function &F : M) {
-    // Do not codegen any 'available_externally' functions at all, they have
-    // definitions outside the translation unit.
-    if (F.isDeclaration() || F.hasAvailableExternallyLinkage())
-      continue;
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  if (F.isDeclaration() || F.hasAvailableExternallyLinkage())
+    return PreservedAnalyses::all();
 
-    MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+  MachineFunction &MF = FAM.getResult<MachineFunctionAnalysis>(F).getMF();
 
-    if (!PI.runBeforePass<MachineFunction>(*Pass, MF))
-      continue;
-    PreservedAnalyses PassPA = Pass->run(MF, MFAM);
-    if (MMI.getMachineFunction(F)) {
-      MFAM.invalidate(MF, PassPA);
-      PI.runAfterPass(*Pass, MF, PassPA);
-    } else {
-      MFAM.clear(MF, F.getName());
-      PI.runAfterPassInvalidated<MachineFunction>(*Pass, PassPA);
-    }
-    PA.intersect(std::move(PassPA));
-  }
+  if (!PI.runBeforePass<MachineFunction>(*Pass, MF))
+    return PreservedAnalyses::all();
+  PreservedAnalyses PassPA = Pass->run(MF, MFAM);
+  MFAM.invalidate(MF, PassPA);
+  PI.runAfterPass(*Pass, MF, PassPA);
+  PA.intersect(std::move(PassPA));
 
   return PA;
 }
 
-void ModuleToMachineFunctionPassAdaptor::printPipeline(
+void FunctionToMachineFunctionPassAdaptor::printPipeline(
     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
   OS << "machine-function(";
   Pass->printPipeline(OS, MapClassName2PassName);
@@ -112,27 +142,24 @@ PreservedAnalyses
 PassManager<MachineFunction>::run(MachineFunction &MF,
                                   AnalysisManager<MachineFunction> &MFAM) {
   PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
-  Function &F = MF.getFunction();
-  MachineModuleInfo &MMI =
-      MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
-          .getCachedResult<MachineModuleAnalysis>(*F.getParent())
-          ->getMMI();
   PreservedAnalyses PA = PreservedAnalyses::all();
   for (auto &Pass : Passes) {
     if (!PI.runBeforePass<MachineFunction>(*Pass, MF))
       continue;
 
     PreservedAnalyses PassPA = Pass->run(MF, MFAM);
-    if (MMI.getMachineFunction(F)) {
-      MFAM.invalidate(MF, PassPA);
-      PI.runAfterPass(*Pass, MF, PassPA);
-    } else {
-      MFAM.clear(MF, F.getName());
-      PI.runAfterPassInvalidated<MachineFunction>(*Pass, PassPA);
-    }
+    MFAM.invalidate(MF, PassPA);
+    PI.runAfterPass(*Pass, MF, PassPA);
     PA.intersect(std::move(PassPA));
   }
   return PA;
 }
 
-} // namespace llvm
+PreservedAnalyses llvm::getMachineFunctionPassPreservedAnalyses() {
+  PreservedAnalyses PA;
+  // Machine function passes are not allowed to modify the LLVM
+  // representation, therefore we should preserve all IR analyses.
+  PA.template preserveSet<AllAnalysesOn<Module>>();
+  PA.template preserveSet<AllAnalysesOn<Function>>();
+  return PA;
+}
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index 57077e786efc26..8120cccace40b5 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -118,6 +118,13 @@ void LLVMContext::addModule(Module *M) {
 
 void LLVMContext::removeModule(Module *M) {
   pImpl->OwnedModules.erase(M);
+  pImpl->MachineFunctionNums.erase(M);
+}
+
+unsigned LLVMContext::generateMachineFunctionNum(Function &F) {
+  Module *M = F.getParent();
+  assert(pImpl->OwnedModules.contains(M) && "Unexpected module!");
+  return pImpl->MachineFunctionNums[M]++;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 7c67e191348eaf..2713015c266c7e 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1450,6 +1450,10 @@ class LLVMContextImpl {
   /// will be automatically deleted if this context is deleted.
   SmallPtrSet<Module *, 4> OwnedModules;
 
+  /// MachineFunctionNums - Keep the next available unique number available for
+  /// a MachineFunction in given module. Module must in OwnedModules.
+  DenseMap<Module *, unsigned> MachineFunctionNums;
+
   /// The main remark streamer used by all the other streamers (e.g. IR, MIR,
   /// frontends, etc.). This should only be used by the specific streamers, and
   /// never directly.
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8d408ca2363a98..30d3e7a1ec05b8 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -82,7 +82,6 @@
 #include "llvm/CodeGen/ExpandLargeDivRem.h"
 #include "llvm/CodeGen/ExpandLargeFpConvert.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
-#include "llvm/CodeGen/FreeMachineFunction.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GlobalMerge.h"
 #include "llvm/CodeGen/HardwareLoops.h"
@@ -92,6 +91,7 @@
 #include "llvm/CodeGen/JMCInstrumenter.h"
 #include "llvm/CodeGen/LowerEmuTLS.h"
 #include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/SafeStack.h"
 #include "llvm/CodeGen/SelectOptimize.h"
@@ -1230,7 +1230,7 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
   StringRef NameNoBracket = Name.take_until([](char C) { return C == '<'; });
   if (NameNoBracket == "function")
     return true;
-  if (Name == "loop" || Name == "loop-mssa")
+  if (Name == "loop" || Name == "loop-mssa" || Name == "machine-function")
     return true;
 
   // Explicitly handle custom-parsed pass names.
@@ -1408,13 +1408,6 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
       return Error::success();
     }
-    if (Name == "machine-function") {
-      MachineFunctionPassManager MFPM;
-      if (auto Err = parseMachinePassPipeline(MFPM, InnerPipeline))
-        return Err;
-      MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
-      return Error::success();
-    }
     if (auto Params = parseFunctionPipelineName(Name)) {
       if (Params->second)
         return make_error<StringError>(
@@ -1732,6 +1725,13 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
       FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
       return Error::success();
     }
+    if (Name == "machine-function") {
+      MachineFunctionPassManager MFPM;
+      if (auto Err = parseMachinePassPipeline(MFPM, InnerPipeline))
+        return Err;
+      FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+      return Error::success();
+    }
 
     for (auto &C : FunctionPipelineParsingCallbacks)
       if (C(Name, FPM, InnerPipeline))
@@ -1975,6 +1975,8 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
   if (MFAM) {
     MAM.registerPass(
         [&] { return MachineFunctionAnalysisManagerModuleProxy(*MFAM); });
+    FAM.registerPass(
+        [&] { return MachineFunctionAnalysisManagerFunctionProxy(*MFAM); });
     MFAM->registerPass(
         [&] { return ModuleAnalysisManagerMachineFunctionProxy(MAM); });
     MFAM->registerPass(
@@ -2023,7 +2025,7 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
                                  std::move(*Pipeline)}}}};
     } else if (isMachineFunctionPassName(
                    FirstName, MachineFunctionPipelineParsingCallbacks)) {
-      Pipeline = {{"machine-function", std::move(*Pipeline)}};
+      Pipeline = {{"function", {{"machine-function", std::move(*Pipeline)}}}};
     } else {
       for (auto &C : TopLevelPipelineParsingCallbacks)
         if (C(MPM, *Pipeline))
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 2fbc7f7d88ba39..9b670e4e3a44bb 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -254,6 +254,9 @@ FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
 FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
 FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
 FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis())
+FUNCTION_ANALYSIS(
+    "machine-function-info",
+    MachineFunctionAnalysis(static_cast<const LLVMTargetMachine *>(TM)))
 FUNCTION_ANALYSIS("gc-function", GCFunctionAnalysis())
 FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index c18b462258623d..63490c83e85f05 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -297,14 +297,6 @@ void unwrapAndPrint(raw_ostream &OS, Any IR) {
     auto *M = unwrapModule(IR);
     assert(M && "should have unwrapped module");
     printIR(OS, M);
-
-    if (const auto *MF = unwrapIR<MachineFunction>(IR)) {
-      auto &MMI = MF->getMMI();
-      for (const auto &F : *M) {
-        if (auto *MF = MMI.getMachineFunction(F))
-          MF->print(OS);
-      }
-    }
     return;
   }
 
diff --git a/llvm/test/tools/llc/new-pm/pipeline.ll b/llvm/test/tools/llc/new-pm/pipeline.ll
index 1ace5963e4ef8c..d1a50642ea311b 100644
--- a/llvm/test/tools/llc/new-pm/pipeline.ll
+++ b/llvm/test/tools/llc/new-pm/pipeline.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -filetype=null %s | FileCheck %s
 
 ; CHECK: require<profile-summary>,require<collector-metadata>
-; CHECK: MachineSanitizerBinaryMetadata,FreeMachineFunctionPass
+; CHECK: MachineSanitizerBinaryMetadata
 
diff --git a/llvm/test/tools/llc/new-pm/pipeline.mir b/llvm/test/tools/llc/new-pm/pipeline.mir
index fcc7d4f8f02e38..6baa710060f055 100644
--- a/llvm/test/tools/llc/new-pm/pipeline.mir
+++ b/llvm/test/tools/llc/new-pm/pipeline.mir
@@ -1,6 +1,6 @@
 # RUN: llc -mtriple=x86_64-pc-linux-gnu -x mir -passes=no-op-machine-function --print-pipeline-passes -filetype=null < %s | FileCheck %s --match-full-lines
 
-# CHECK: machine-function(no-op-machine-function),PrintMIRPreparePass,machine-function(print,FreeMachineFunctionPass)
+# CHECK: function(machine-function(no-op-machine-function)),PrintMIRPreparePass,function(machine-function(print))
 
 ---
 name: f
diff --git a/llvm/test/tools/llc/new-pm/start-stop.ll b/llvm/test/tools/llc/new-pm/start-stop.ll
index 8c795a7a70f810..ba225d227d4ca8 100644
--- a/llvm/test/tools/llc/new-pm/start-stop.ll
+++ b/llvm/test/tools/llc/new-pm/start-stop.ll
@@ -2,4 +2,4 @@
 ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -o /dev/null %s | FileCheck --match-full-lines %s --check-prefix=OBJ
 
 ; NULL: function(mergeicmps,expand-memcmp,gc-lowering)
-; OBJ: function(mergeicmps,expand-memcmp,gc-lowering),PrintMIRPreparePass,machine-function(print)
+; OBJ: function(mergeicmps,expand-memcmp,gc-lowering),PrintMIRPreparePass,function(machine-function(print),invalidate<machine-function-info>)
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 6ae1b8db5e115c..6d9956ea07d356 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/CommandFlags.h"
-#include "llvm/CodeGen/FreeMachineFunction.h"
 #include "llvm/CodeGen/MIRParser/MIRParser.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -120,11 +119,11 @@ int llvm::compileModuleWithNewPM(
   SI.registerCallbacks(PIC);
   registerCodeGenCallback(PIC, LLVMTM);
 
+  MachineFunctionAnalysisManager MFAM;
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
   CGSCCAnalysisManager CGAM;
   ModuleAnalysisManager MAM;
-  MachineFunctionAnalysisManager MFAM;
   PassBuilder PB(Target.get(), PipelineTuningOptions(), std::nullopt, &PIC);
   PB.registerModuleAnalyses(MAM);
   PB.registerCGSCCAnalyses(CGAM);
@@ -137,6 +136,7 @@ int llvm::compileModuleWithNewPM(
   MAM.registerPass([&] { return MachineModuleAnalysis(MMI); });
 
   ModulePassManager MPM;
+  FunctionPassManager FPM;
 
   if (!PassPipeline.empty()) {
     // Construct a custom pass pipeline that starts after instruction
@@ -152,10 +152,10 @@ int llvm::compileModuleWithNewPM(
     MPM.addPass(PrintMIRPreparePass(*OS));
     MachineFunctionPassManager MFPM;
     MFPM.addPass(PrintMIRPass(*OS));
-    MFPM.addPass(FreeMachineFunctionPass());
-    MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
+    FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
-    if (MIR->parseMachineFunctions(*M, MMI))
+    if (MIR->parseMachineFunctions(*M, MAM))
       return 1;
   } else {
     ExitOnErr(LLVMTM.buildCodeGenPipeline(
diff --git a/llvm/unittests/CodeGen/PassManagerTest.cpp b/llvm/unittests/CodeGen/PassManagerTest.cpp
index 71f8832d436564..d3a410f5450cc6 100644
--- a/llvm/unittests/CodeGen/PassManagerTest.cpp
+++ b/llvm/unittests/CodeGen/PassManagerTest.cpp
@@ -184,8 +184,8 @@ TEST_F(PassManagerTest, Basic) {
 
   MachineModuleInfo MMI(LLVMTM);
 
-  LoopAnalysisManager LAM;
   MachineFunctionAnalysisManager MFAM;
+  LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
   CGSCCAnalysisManager CGAM;
   ModuleAnalysisManager MAM;
@@ -205,13 +205,17 @@ TEST_F(PassManagerTest, Basic) {
   std::vector<int> Counts;
 
   ModulePassManager MPM;
+  FunctionPassManager FPM;
   MachineFunctionPassManager MFPM;
   MPM.addPass(TestMachineModulePass(Count, Counts));
-  MPM.addPass(createModuleToMachineFunctionPassAdaptor(
+  FPM.addPass(createFunctionToMachineFunctionPassAdaptor(
       TestMachineFunctionPass(Count, Counts)));
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
   MPM.addPass(TestMachineModulePass(Count, Counts));
   MFPM.addPass(TestMachineFunctionPass(Count, Counts));
-  MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
+  FPM = FunctionPassManager();
+  FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
 
   testing::internal::CaptureStderr();
   MPM.run(*M, MAM);
@@ -248,8 +252,10 @@ TEST_F(PassManagerTest, DiagnosticHandler) {
   ModulePassManager MPM;
   FunctionPassManager FPM;
   MachineFunctionPassManager MFPM;
+  MPM.addPass(RequireAnalysisPass<MachineModuleAnalysis, Module>());
   MFPM.addPass(ReportWarningPass());
-  MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
+  FPM.addPass(createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
   testing::internal::CaptureStderr();
   MPM.run(*M, MAM);
   std::string Output = testing::internal::GetCapturedStderr();
diff --git a/llvm/unittests/MIR/CMakeLists.txt b/llvm/unittests/MIR/CMakeLists.txt
index 0ad52134a34da2..d6aff46eff07b7 100644
--- a/llvm/unittests/MIR/CMakeLists.txt
+++ b/llvm/unittests/MIR/CMakeLists.txt
@@ -15,7 +15,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(MIRTests
   MachineMetadata.cpp
-  PassBuilderCallbacksTest.cpp
   )
 
 target_link_libraries(MIRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp b/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp
deleted file mode 100644
index 6fd4e54a929f40..00000000000000
--- a/llvm/unittests/MIR/PassBuilderCallbacksTest.cpp
+++ /dev/null
@@ -1,575 +0,0 @@
-//===- unittests/MIR/PassBuilderCallbacksTest.cpp - PB Callback Tests -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/CGSCCPassManager.h"
-#include "llvm/Analysis/LoopAnalysisManager.h"
-#include "llvm/CodeGen/FreeMachineFunction.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Testing/Support/Error.h"
-#include <functional>
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-#include <llvm/ADT/Any.h>
-#include <llvm/AsmParser/Parser.h>
-#include <llvm/CodeGen/MIRParser/MIRParser.h>
-#include <llvm/CodeGen/MachineFunction.h>
-#include <llvm/CodeGen/MachineModuleInfo.h>
-#include <llvm/CodeGen/MachinePassManager.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/IR/PassInstrumentation.h>
-#include <llvm/IR/PassManager.h>
-#include <llvm/Passes/PassBuilder.h>
-#include <llvm/Support/Regex.h>
-#include <llvm/Support/SourceMgr.h>
-#include <llvm/Support/TargetSelect.h>
-
-using namespace llvm;
-
-namespace {
-using testing::_;
-using testing::AnyNumber;
-using testing::DoAll;
-using testing::Not;
-using testing::Return;
-using testing::WithArgs;
-
-StringRef MIRString = R"MIR(
---- |
-  define void @test() {
-    ret void
-  }
-...
----
-name:            test
-body:             |
-  bb.0 (%ir-block.0):
-    RET64
-...
-)MIR";
-
-/// Helper for HasName matcher that returns getName both for IRUnit and
-/// for IRUnit pointer wrapper into llvm::Any (wrapped by PassInstrumentation).
-template <typename IRUnitT> std::string getName(const IRUnitT &IR) {
-  return std::string(IR.getName());
-}
-
-template <> std::string getName(const StringRef &name) {
-  return std::string(name);
-}
-
-template <> std::string getName(const Any &WrappedIR) {
-  if (const auto *const *M = llvm::any_cast<const Module *>(&WrappedIR))
-    return (*M)->getName().str();
-  if (const auto *const *F = llvm::any_cast<const Function *>(&WrappedIR))
-    return (*F)->getName().str();
-  if (const auto *const *MF =
-          llvm::any_cast<const MachineFunction *>(&WrappedIR))
-    return (*MF)->getName().str();
-  return "<UNKNOWN>";
-}
-/// Define a custom matcher for objects which support a 'getName' method.
-///
-/// LLVM often has IR objects or analysis objects which expose a name
-/// and in tests it is convenient to match these by name for readability.
-/// Usually, this name is either a StringRef or a plain std::string. This
-/// matcher supports any type exposing a getName() method of this form whose
-/// return value is compatible with an std::ostream. For StringRef, this uses
-/// the shift operator defined above.
-///
-/// It should be used as:
-///
-///   HasName("my_function")
-///
-/// No namespace or other qualification is required.
-MATCHER_P(HasName, Name, "") {
-  *result_listener << "has name '" << getName(arg) << "'";
-  return Name == getName(arg);
-}
-
-MATCHER_P(HasNameRegex, Name, "") {
-  *result_listener << "has name '" << getName(arg) << "'";
-  llvm::Regex r(Name);
-  return r.match(getName(arg));
-}
-
-struct MockPassInstrumentationCallbacks {
-  MockPassInstrumentationCallbacks() {
-    ON_CALL(*this, runBeforePass(_, _)).WillByDefault(Return(true));
-  }
-  MOCK_METHOD2(runBeforePass, bool(StringRef PassID, llvm::Any));
-  MOCK_METHOD2(runBeforeSkippedPass, void(StringRef PassID, llvm::Any));
-  MOCK_METHOD2(runBeforeNonSkippedPass, void(StringRef PassID, llvm::Any));
-  MOCK_METHOD3(runAfterPass,
-               void(StringRef PassID, llvm::Any, const PreservedAnalyses &PA));
-  MOCK_METHOD2(runAfterPassInvalidated,
-               void(StringRef PassID, const PreservedAnalyses &PA));
-  MOCK_METHOD2(runBeforeAnalysis, void(StringRef PassID, llvm::Any));
-  MOCK_METHOD2(runAfterAnalysis, void(StringRef PassID, llvm::Any));
-
-  void registerPassInstrumentation(PassInstrumentationCallbacks &Callbacks) {
-    Callbacks.registerShouldRunOptionalPassCallback(
-        [this](StringRef P, llvm::Any IR) {
-          return this->runBeforePass(P, IR);
-        });
-    Callbacks.registerBeforeSkippedPassCallback(
-        [this](StringRef P, llvm::Any IR) {
-          this->runBeforeSkippedPass(P, IR);
-        });
-    Callbacks.registerBeforeNonSkippedPassCallback(
-        [this](StringRef P, llvm::Any IR) {
-          this->runBeforeNonSkippedPass(P, IR);
-        });
-    Callbacks.registerAfterPassCallback(
-        [this](StringRef P, llvm::Any IR, const PreservedAnalyses &PA) {
-          this->runAfterPass(P, IR, PA);
-        });
-    Callbacks.registerAfterPassInvalidatedCallback(
-        [this](StringRef P, const PreservedAnalyses &PA) {
-          this->runAfterPassInvalidated(P, PA);
-        });
-    Callbacks.registerBeforeAnalysisCallback([this](StringRef P, llvm::Any IR) {
-      return this->runBeforeAnalysis(P, IR);
-    });
-    Callbacks.registerAfterAnalysisCallback(
-        [this](StringRef P, llvm::Any IR) { this->runAfterAnalysis(P, IR); });
-  }
-
-  void ignoreNonMockPassInstrumentation(StringRef IRName) {
-    // Generic EXPECT_CALLs are needed to match instrumentation on unimportant
-    // parts of a pipeline that we do not care about (e.g. various passes added
-    // by default by PassBuilder - Verifier pass etc).
-    // Make sure to avoid ignoring Mock passes/analysis, we definitely want
-    // to check these explicitly.
-    EXPECT_CALL(*this,
-                runBeforePass(Not(HasNameRegex("Mock")), HasName(IRName)))
-        .Times(AnyNumber())
-        .WillRepeatedly(Return(false));
-    EXPECT_CALL(
-        *this, runBeforeSkippedPass(Not(HasNameRegex("Mock")), HasName(IRName)))
-        .Times(AnyNumber());
-    EXPECT_CALL(*this, runBeforeNonSkippedPass(Not(HasNameRegex("Mock")),
-                                               HasName(IRName)))
-        .Times(AnyNumber());
-    EXPECT_CALL(*this,
-                runAfterPass(Not(HasNameRegex("Mock")), HasName(IRName), _))
-        .Times(AnyNumber());
-    EXPECT_CALL(*this,
-                runBeforeAnalysis(Not(HasNameRegex("Mock")), HasName(IRName)))
-        .Times(AnyNumber());
-    EXPECT_CALL(*this,
-                runAfterAnalysis(Not(HasNameRegex("Mock")), HasName(IRName)))
-        .Times(AnyNumber());
-  }
-};
-
-template <typename DerivedT> class MockAnalysisHandleBase {
-public:
-  class Analysis : public AnalysisInfoMixin<Analysis> {
-    friend AnalysisInfoMixin<Analysis>;
-    friend MockAnalysisHandleBase;
-    static AnalysisKey Key;
-
-    DerivedT *Handle;
-
-    Analysis(DerivedT &Handle) : Handle(&Handle) {
-      static_assert(std::is_base_of<MockAnalysisHandleBase, DerivedT>::value,
-                    "Must pass the derived type to this template!");
-    }
-
-  public:
-    class Result {
-      friend MockAnalysisHandleBase;
-
-      DerivedT *Handle;
-
-      Result(DerivedT &Handle) : Handle(&Handle) {}
-
-    public:
-      // Forward invalidation events to the mock handle.
-      bool invalidate(MachineFunction &IR, const PreservedAnalyses &PA,
-                      MachineFunctionAnalysisManager::Invalidator &Inv) {
-        return Handle->invalidate(IR, PA, Inv);
-      }
-    };
-
-    Result run(MachineFunction &IR, MachineFunctionAnalysisManager &AM) {
-      return Handle->run(IR, AM);
-    }
-  };
-
-  Analysis getAnalysis() { return Analysis(static_cast<DerivedT &>(*this)); }
-  typename Analysis::Result getResult() {
-    return typename Analysis::Result(static_cast<DerivedT &>(*this));
-  }
-  static StringRef getName() { return llvm::getTypeName<DerivedT>(); }
-
-protected:
-  // FIXME: MSVC seems unable to handle a lambda argument to Invoke from within
-  // the template, so we use a boring static function.
-  static bool
-  invalidateCallback(MachineFunction &IR, const PreservedAnalyses &PA,
-                     MachineFunctionAnalysisManager::Invalidator &Inv) {
-    auto PAC = PA.template getChecker<Analysis>();
-    return !PAC.preserved() &&
-           !PAC.template preservedSet<AllAnalysesOn<MachineFunction>>();
-  }
-
-  /// Derived classes should call this in their constructor to set up default
-  /// mock actions. (We can't do this in our constructor because this has to
-  /// run after the DerivedT is constructed.)
-  void setDefaults() {
-    ON_CALL(static_cast<DerivedT &>(*this), run(_, _))
-        .WillByDefault(Return(this->getResult()));
-    ON_CALL(static_cast<DerivedT &>(*this), invalidate(_, _, _))
-        .WillByDefault(&invalidateCallback);
-  }
-};
-
-template <typename DerivedT> class MockPassHandleBase {
-public:
-  class Pass : public PassInfoMixin<Pass> {
-    friend MockPassHandleBase;
-
-    DerivedT *Handle;
-
-    Pass(DerivedT &Handle) : Handle(&Handle) {
-      static_assert(std::is_base_of<MockPassHandleBase, DerivedT>::value,
-                    "Must pass the derived type to this template!");
-    }
-
-  public:
-    PreservedAnalyses run(MachineFunction &IR,
-                          MachineFunctionAnalysisManager &AM) {
-      return Handle->run(IR, AM);
-    }
-  };
-
-  static StringRef getName() { return llvm::getTypeName<DerivedT>(); }
-
-  Pass getPass() { return Pass(static_cast<DerivedT &>(*this)); }
-
-protected:
-  /// Derived classes should call this in their constructor to set up default
-  /// mock actions. (We can't do this in our constructor because this has to
-  /// run after the DerivedT is constructed.)
-  void setDefaults() {
-    ON_CALL(static_cast<DerivedT &>(*this), run(_, _))
-        .WillByDefault(Return(PreservedAnalyses::all()));
-  }
-};
-
-struct MockAnalysisHandle : public MockAnalysisHandleBase<MockAnalysisHandle> {
-  MOCK_METHOD2(run, Analysis::Result(MachineFunction &,
-                                     MachineFunctionAnalysisManager &));
-
-  MOCK_METHOD3(invalidate, bool(MachineFunction &, const PreservedAnalyses &,
-                                MachineFunctionAnalysisManager::Invalidator &));
-
-  MockAnalysisHandle() { setDefaults(); }
-};
-
-template <typename DerivedT>
-AnalysisKey MockAnalysisHandleBase<DerivedT>::Analysis::Key;
-
-class MockPassHandle : public MockPassHandleBase<MockPassHandle> {
-public:
-  MOCK_METHOD2(run, PreservedAnalyses(MachineFunction &,
-                                      MachineFunctionAnalysisManager &));
-
-  MockPassHandle() { setDefaults(); }
-};
-
-class MachineFunctionCallbacksTest : public testing::Test {
-protected:
-  static void SetUpTestCase() {
-    InitializeAllTargetInfos();
-    InitializeAllTargets();
-    InitializeAllTargetMCs();
-  }
-
-  LLVMContext Context;
-
-  std::unique_ptr<LLVMTargetMachine> TM;
-  std::unique_ptr<MachineModuleInfo> MMI;
-
-  std::unique_ptr<Module> M;
-
-  PassInstrumentationCallbacks PIC;
-  std::unique_ptr<PassBuilder> PB;
-  ModulePassManager MPM;
-  MachineFunctionAnalysisManager MFAM;
-  LoopAnalysisManager LAM;
-  FunctionAnalysisManager FAM;
-  CGSCCAnalysisManager CGAM;
-  ModuleAnalysisManager MAM;
-
-  MockPassInstrumentationCallbacks CallbacksHandle;
-  MockPassHandle PassHandle;
-  MockAnalysisHandle AnalysisHandle;
-
-  static std::unique_ptr<Module> parseMIR(StringRef MIRCode,
-                                          LLVMContext &Context,
-                                          TargetMachine &TM,
-                                          MachineModuleInfo &MMI) {
-    SMDiagnostic Diagnostic;
-    std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode);
-    std::unique_ptr<MIRParser> MIR =
-        createMIRParser(std::move(MBuffer), Context);
-    assert(MIR);
-
-    std::unique_ptr<Module> Mod = MIR->parseIRModule();
-    assert(Mod);
-
-    // Module identifier is used in tests below.
-    Mod->setModuleIdentifier("module");
-    Mod->setDataLayout(TM.createDataLayout());
-
-    [[maybe_unused]] bool Ret = MIR->parseMachineFunctions(*Mod, MMI);
-    assert(!Ret);
-
-    return Mod;
-  }
-
-  static PreservedAnalyses
-  getAnalysisResult(MachineFunction &U, MachineFunctionAnalysisManager &MFAM) {
-    MFAM.getResult<MockAnalysisHandle::Analysis>(U);
-    return PreservedAnalyses::all();
-  }
-
-  void SetUp() override {
-    std::string Error;
-    auto TripleName = "x86_64-pc-linux-gnu";
-    auto *T = TargetRegistry::lookupTarget(TripleName, Error);
-    if (!T)
-      GTEST_SKIP();
-    TM = std::unique_ptr<LLVMTargetMachine>(
-        static_cast<LLVMTargetMachine *>(T->createTargetMachine(
-            TripleName, "", "", TargetOptions(), std::nullopt)));
-    if (!TM)
-      GTEST_SKIP();
-
-    MMI = std::make_unique<MachineModuleInfo>(TM.get());
-    M = parseMIR(MIRString, Context, *TM, *MMI);
-    PB = std::make_unique<PassBuilder>(TM.get(), PipelineTuningOptions(),
-                                       std::nullopt, &PIC);
-
-    /// Register a callback for analysis registration.
-    ///
-    /// The callback is a function taking a reference to an AnalyisManager
-    /// object. When called, the callee gets to register its own analyses with
-    /// this PassBuilder instance.
-    PB->registerAnalysisRegistrationCallback(
-        [this](MachineFunctionAnalysisManager &AM) {
-          // Register our mock analysis
-          AM.registerPass([this] { return AnalysisHandle.getAnalysis(); });
-        });
-
-    /// Register a callback for pipeline parsing.
-    ///
-    /// During parsing of a textual pipeline, the PassBuilder will call these
-    /// callbacks for each encountered pass name that it does not know. This
-    /// includes both simple pass names as well as names of sub-pipelines. In
-    /// the latter case, the InnerPipeline is not empty.
-    PB->registerPipelineParsingCallback(
-        [this](StringRef Name, MachineFunctionPassManager &PM,
-               ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
-          if (parseAnalysisUtilityPasses<MockAnalysisHandle::Analysis>(
-                  "test-analysis", Name, PM))
-            return true;
-
-          /// Parse the name of our pass mock handle
-          if (Name == "test-transform") {
-            PM.addPass(PassHandle.getPass());
-            return true;
-          }
-          return false;
-        });
-
-    /// Register builtin analyses and cross-register the analysis proxies
-    PB->registerModuleAnalyses(MAM);
-    PB->registerCGSCCAnalyses(CGAM);
-    PB->registerFunctionAnalyses(FAM);
-    PB->registerLoopAnalyses(LAM);
-    PB->registerMachineFunctionAnalyses(MFAM);
-    PB->crossRegisterProxies(LAM, FAM, CGAM, MAM, &MFAM);
-    MAM.registerPass([&] { return MachineModuleAnalysis(*MMI); });
-  }
-};
-
-TEST_F(MachineFunctionCallbacksTest, Passes) {
-  EXPECT_CALL(AnalysisHandle, run(HasName("test"), _));
-  EXPECT_CALL(PassHandle, run(HasName("test"), _)).WillOnce(&getAnalysisResult);
-
-  StringRef PipelineText = "test-transform";
-  ASSERT_THAT_ERROR(PB->parsePassPipeline(MPM, PipelineText), Succeeded())
-      << "Pipeline was: " << PipelineText;
-  MPM.run(*M, MAM);
-}
-
-TEST_F(MachineFunctionCallbacksTest, InstrumentedPasses) {
-  CallbacksHandle.registerPassInstrumentation(PIC);
-  // Non-mock instrumentation not specifically mentioned below can be ignored.
-  CallbacksHandle.ignoreNonMockPassInstrumentation("test");
-  CallbacksHandle.ignoreNonMockPassInstrumentation("module");
-
-  // PassInstrumentation calls should happen in-sequence, in the same order
-  // as passes/analyses are scheduled.
-  ::testing::Sequence PISequence;
-  EXPECT_CALL(CallbacksHandle,
-              runBeforePass(HasNameRegex("MockPassHandle"), HasName("test")))
-      .InSequence(PISequence)
-      .WillOnce(Return(true));
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), HasName("test")))
-      .InSequence(PISequence);
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("test")))
-      .InSequence(PISequence);
-  EXPECT_CALL(
-      CallbacksHandle,
-      runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), HasName("test")))
-      .InSequence(PISequence);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPass(HasNameRegex("MockPassHandle"), HasName("test"), _))
-      .InSequence(PISequence);
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforeSkippedPass(HasNameRegex("MockPassHandle"), HasName("test")))
-      .Times(0);
-
-  EXPECT_CALL(AnalysisHandle, run(HasName("test"), _));
-  EXPECT_CALL(PassHandle, run(HasName("test"), _)).WillOnce(&getAnalysisResult);
-
-  StringRef PipelineText = "test-transform";
-  ASSERT_THAT_ERROR(PB->parsePassPipeline(MPM, PipelineText), Succeeded())
-      << "Pipeline was: " << PipelineText;
-  MPM.run(*M, MAM);
-}
-
-TEST_F(MachineFunctionCallbacksTest, InstrumentedSkippedPasses) {
-  CallbacksHandle.registerPassInstrumentation(PIC);
-  // Non-mock instrumentation run here can safely be ignored.
-  CallbacksHandle.ignoreNonMockPassInstrumentation("test");
-  CallbacksHandle.ignoreNonMockPassInstrumentation("module");
-
-  // Skip the pass by returning false.
-  EXPECT_CALL(CallbacksHandle,
-              runBeforePass(HasNameRegex("MockPassHandle"), HasName("test")))
-      .WillOnce(Return(false));
-
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforeSkippedPass(HasNameRegex("MockPassHandle"), HasName("test")))
-      .Times(1);
-
-  EXPECT_CALL(AnalysisHandle, run(HasName("test"), _)).Times(0);
-  EXPECT_CALL(PassHandle, run(HasName("test"), _)).Times(0);
-
-  // As the pass is skipped there is no afterPass, beforeAnalysis/afterAnalysis
-  // as well.
-  EXPECT_CALL(CallbacksHandle,
-              runBeforeNonSkippedPass(HasNameRegex("MockPassHandle"), _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPass(HasNameRegex("MockPassHandle"), _, _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPassInvalidated(HasNameRegex("MockPassHandle"), _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPass(HasNameRegex("MockPassHandle"), _, _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle,
-              runBeforeAnalysis(HasNameRegex("MockAnalysisHandle"), _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), _))
-      .Times(0);
-
-  StringRef PipelineText = "test-transform";
-  ASSERT_THAT_ERROR(PB->parsePassPipeline(MPM, PipelineText), Succeeded())
-      << "Pipeline was: " << PipelineText;
-  MPM.run(*M, MAM);
-}
-
-// Check that the Module -> MachineFunction adaptor properly calls
-// runAfterPassInvalidated.
-TEST_F(MachineFunctionCallbacksTest, InstrumentedFreeMFPass) {
-  CallbacksHandle.registerPassInstrumentation(PIC);
-  // Non-mock instrumentation run here can safely be ignored.
-  CallbacksHandle.ignoreNonMockPassInstrumentation("test");
-  CallbacksHandle.ignoreNonMockPassInstrumentation("module");
-
-  ::testing::Sequence PISequence;
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforePass(HasNameRegex("FreeMachineFunctionPass"), HasName("test")))
-      .InSequence(PISequence)
-      .WillOnce(Return(true));
-  EXPECT_CALL(CallbacksHandle,
-              runBeforeNonSkippedPass(HasNameRegex("FreeMachineFunctionPass"),
-                                      HasName("test")))
-      .InSequence(PISequence);
-  EXPECT_CALL(CallbacksHandle, runAfterPassInvalidated(
-                                   HasNameRegex("FreeMachineFunctionPass"), _))
-      .InSequence(PISequence);
-
-  // runAfterPass should not be called since the MachineFunction is no longer
-  // valid after FreeMachineFunctionPass.
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPass(HasNameRegex("FreeMachineFunctionPass"), _, _))
-      .Times(0);
-
-  MPM.addPass(
-      createModuleToMachineFunctionPassAdaptor(FreeMachineFunctionPass()));
-  MPM.run(*M, MAM);
-}
-
-// Check that the Module -> MachineFunction adaptor and MachineFunction pass
-// manager properly call runAfterPassInvalidated.
-TEST_F(MachineFunctionCallbacksTest, InstrumentedFreeMFPass2) {
-  CallbacksHandle.registerPassInstrumentation(PIC);
-  // Non-mock instrumentation run here can safely be ignored.
-  CallbacksHandle.ignoreNonMockPassInstrumentation("test");
-  CallbacksHandle.ignoreNonMockPassInstrumentation("module");
-
-  ::testing::Sequence PISequence;
-  EXPECT_CALL(
-      CallbacksHandle,
-      runBeforePass(HasNameRegex("FreeMachineFunctionPass"), HasName("test")))
-      .InSequence(PISequence)
-      .WillOnce(Return(true));
-  EXPECT_CALL(CallbacksHandle,
-              runBeforeNonSkippedPass(HasNameRegex("FreeMachineFunctionPass"),
-                                      HasName("test")))
-      .InSequence(PISequence);
-  EXPECT_CALL(CallbacksHandle, runAfterPassInvalidated(
-                                   HasNameRegex("FreeMachineFunctionPass"), _))
-      .InSequence(PISequence);
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPassInvalidated(HasNameRegex("PassManager"), _))
-      .InSequence(PISequence);
-
-  // runAfterPass should not be called since the MachineFunction is no longer
-  // valid after FreeMachineFunctionPass.
-  EXPECT_CALL(CallbacksHandle,
-              runAfterPass(HasNameRegex("FreeMachineFunctionPass"), _, _))
-      .Times(0);
-  EXPECT_CALL(CallbacksHandle, runAfterPass(HasNameRegex("PassManager"), _, _))
-      .Times(0);
-
-  MachineFunctionPassManager MFPM;
-  MFPM.addPass(FreeMachineFunctionPass());
-  MPM.addPass(createModuleToMachineFunctionPassAdaptor(std::move(MFPM)));
-  MPM.run(*M, MAM);
-}
-
-} // end anonymous namespace

From b3291793f11924a3b62601aabebebdcfbb12a9a1 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang@intel.com>
Date: Tue, 30 Apr 2024 10:09:41 +0800
Subject: [PATCH 13/65] [X86] Enable EVEX512 when host CPU has AVX512 (#90479)

This is used when -march=native run on an unknown CPU to old version of
LLVM.
---
 llvm/lib/TargetParser/Host.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 0a93b06f40c248..e911d15498545c 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1287,8 +1287,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(X86::FEATURE_AVX2);
   if (HasLeaf7 && ((EBX >> 8) & 1))
     setFeature(X86::FEATURE_BMI2);
-  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
     setFeature(X86::FEATURE_AVX512F);
+    setFeature(X86::FEATURE_EVEX512);
+  }
   if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
     setFeature(X86::FEATURE_AVX512DQ);
   if (HasLeaf7 && ((EBX >> 19) & 1))
@@ -1799,6 +1801,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
   // AVX512 is only supported if the OS supports the context save for it.
   Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
+  Features["evex512"]    = Features["avx512f"];
   Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
   Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
   Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);

From 9d5411ffba0d94b60050cc873773935addca9533 Mon Sep 17 00:00:00 2001
From: sinan <sinan.lin@linux.alibaba.com>
Date: Tue, 30 Apr 2024 11:27:18 +0800
Subject: [PATCH 14/65] [BOLT] Avoid reference updates for non-JT symbol
 operands (#88838)

Skip updating references for operands that do not directly
refer to jump table symbols but fall within a jump table's
address range to prevent unintended modifications.
---
 bolt/lib/Passes/ValidateMemRefs.cpp        |  8 +--
 bolt/test/X86/jt-symbol-disambiguation-4.s | 60 ++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 bolt/test/X86/jt-symbol-disambiguation-4.s

diff --git a/bolt/lib/Passes/ValidateMemRefs.cpp b/bolt/lib/Passes/ValidateMemRefs.cpp
index f29a97c43f497c..ca58493b279c9e 100644
--- a/bolt/lib/Passes/ValidateMemRefs.cpp
+++ b/bolt/lib/Passes/ValidateMemRefs.cpp
@@ -29,8 +29,7 @@ bool ValidateMemRefs::checkAndFixJTReference(BinaryFunction &BF, MCInst &Inst,
   if (!BD)
     return false;
 
-  const uint64_t TargetAddress = BD->getAddress() + Offset;
-  JumpTable *JT = BC.getJumpTableContainingAddress(TargetAddress);
+  JumpTable *JT = BC.getJumpTableContainingAddress(BD->getAddress());
   if (!JT)
     return false;
 
@@ -43,8 +42,9 @@ bool ValidateMemRefs::checkAndFixJTReference(BinaryFunction &BF, MCInst &Inst,
   // the jump table label with a regular rodata reference. Get a
   // non-JT reference by fetching the symbol 1 byte before the JT
   // label.
-  MCSymbol *NewSym = BC.getOrCreateGlobalSymbol(TargetAddress - 1, "DATAat");
-  BC.MIB->setOperandToSymbolRef(Inst, OperandNum, NewSym, 1, &*BC.Ctx, 0);
+  MCSymbol *NewSym = BC.getOrCreateGlobalSymbol(BD->getAddress() - 1, "DATAat");
+  BC.MIB->setOperandToSymbolRef(Inst, OperandNum, NewSym, Offset + 1, &*BC.Ctx,
+                                0);
   LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replaced reference @" << BF.getPrintName()
                     << " from " << BD->getName() << " to " << NewSym->getName()
                     << " + 1\n");
diff --git a/bolt/test/X86/jt-symbol-disambiguation-4.s b/bolt/test/X86/jt-symbol-disambiguation-4.s
new file mode 100644
index 00000000000000..816b0fb501a590
--- /dev/null
+++ b/bolt/test/X86/jt-symbol-disambiguation-4.s
@@ -0,0 +1,60 @@
+# If the operand references a symbol that differs from the jump table label,
+# no reference updating is required even if its target address resides within
+# the jump table's range.
+# In this test case, consider the second instruction within the main function,
+# where the address resulting from 'c + 17' corresponds to one byte beyond the
+# address of the .LJTI2_0 jump table label. However, this operand represents
+# an offset calculation related to the global variable 'c' and should remain
+# unaffected by the jump table.
+
+# REQUIRES: system-linux
+
+
+# RUN: %clang -no-pie  %s -o %t.exe -Wl,-q
+
+# RUN: %t.exe
+# RUN: llvm-bolt -funcs=main,foo/1 %t.exe -o %t.exe.bolt -jump-tables=move
+# RUN: %t.exe.bolt
+
+	.text
+	.globl	main
+	.type	main,@function
+main:
+	pushq   %rbp
+	movq	%rsp, %rbp
+	movq	$-16, %rax
+	movl	c+17(%rax), %edx
+	cmpl	$255, %edx
+	je	.LCorrect
+	movl	$1, %eax
+	popq	%rbp
+	ret
+.LCorrect:
+	movl	$0, %eax
+	popq	%rbp
+	ret
+	.p2align	4, 0x90
+	.type	foo,@function
+foo:
+	movq	$0, %rax
+	jmpq	*.LJTI2_0(,%rax,8)
+	addl	$-36, %eax
+.LBB2_2:
+	addl	$-16, %eax
+	retq
+	.section	.rodata,"a",@progbits
+	.type	c,@object
+	.data
+	.globl	c
+	.p2align	4, 0x0
+c:
+	.byte 1
+  .byte 0xff
+	.zero	14
+	.size	c, 16
+.LJTI2_0:
+	.quad	.LBB2_2
+	.quad	.LBB2_2
+	.quad	.LBB2_2
+	.quad	.LBB2_2
+

From 38067c50a9459caed2892e38b2ae5026a8bff8e2 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 10:59:37 +0800
Subject: [PATCH 15/65] [C++20] [Modules] [Reduced BMI] Avoid force writing
 static declarations within module purview

Close https://github.com/llvm/llvm-project/issues/90259

Technically, the static declarations shouldn't be leaked from the module
interface, otherwise it is an illegal program according to the spec. So
we can get rid of the static declarations from the reduced BMI
technically. Then we can close the above issue.

However, there are too many `static inline` codes in existing headers.
So it will be a pretty big breaking change if we do this globally.
---
 clang/lib/Serialization/ASTWriter.cpp | 36 ++++++++++++++++++++--
 clang/test/Modules/pr90259.cppm       | 44 +++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Modules/pr90259.cppm

diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 0408eeb6a95b00..7db60c67d71234 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3205,6 +3205,17 @@ void ASTWriter::WriteType(QualType T) {
 // Declaration Serialization
 //===----------------------------------------------------------------------===//
 
+static bool IsInternalDeclFromFileContext(const Decl *D) {
+  auto *ND = dyn_cast<NamedDecl>(D);
+  if (!ND)
+    return false;
+
+  if (!D->getDeclContext()->getRedeclContext()->isFileContext())
+    return false;
+
+  return ND->getFormalLinkage() == Linkage::Internal;
+}
+
 /// Write the block containing all of the declaration IDs
 /// lexically declared within the given DeclContext.
 ///
@@ -3225,6 +3236,15 @@ uint64_t ASTWriter::WriteDeclContextLexicalBlock(ASTContext &Context,
     if (DoneWritingDeclsAndTypes && !wasDeclEmitted(D))
       continue;
 
+    // We don't need to write decls with internal linkage into reduced BMI.
+    // If such decls gets emitted due to it get used from inline functions,
+    // the program illegal. However, there are too many use of static inline
+    // functions in the global module fragment and it will be breaking change
+    // to forbid that. So we have to allow to emit such declarations from GMF.
+    if (GeneratingReducedBMI && !D->isFromExplicitGlobalModule() &&
+        IsInternalDeclFromFileContext(D))
+      continue;
+
     KindDeclPairs.push_back(D->getKind());
     KindDeclPairs.push_back(GetDeclRef(D).get());
   }
@@ -3886,6 +3906,13 @@ class ASTDeclContextNameLookupTrait {
           !Writer.wasDeclEmitted(DeclForLocalLookup))
         continue;
 
+      // Try to avoid writing internal decls to reduced BMI.
+      // See comments in ASTWriter::WriteDeclContextLexicalBlock for details.
+      if (Writer.isGeneratingReducedBMI() &&
+          !DeclForLocalLookup->isFromExplicitGlobalModule() &&
+          IsInternalDeclFromFileContext(DeclForLocalLookup))
+        continue;
+
       DeclIDs.push_back(Writer.GetDeclRef(DeclForLocalLookup));
     }
     return std::make_pair(Start, DeclIDs.size());
@@ -4257,6 +4284,12 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
         if (DoneWritingDeclsAndTypes && !wasDeclEmitted(ND))
           continue;
 
+        // We don't need to force emitting internal decls into reduced BMI.
+        // See comments in ASTWriter::WriteDeclContextLexicalBlock for details.
+        if (GeneratingReducedBMI && !ND->isFromExplicitGlobalModule() &&
+            IsInternalDeclFromFileContext(ND))
+          continue;
+
         GetDeclRef(ND);
       }
     }
@@ -4917,8 +4950,7 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
       // is ill-formed. However, in practice, there are a lot of projects
       // uses `static inline` in the headers. So we can't get rid of all
       // static entities in reduced BMI now.
-      if (auto *ND = dyn_cast<NamedDecl>(D);
-          ND && ND->getFormalLinkage() == Linkage::Internal)
+      if (IsInternalDeclFromFileContext(D))
         continue;
     }
 
diff --git a/clang/test/Modules/pr90259.cppm b/clang/test/Modules/pr90259.cppm
new file mode 100644
index 00000000000000..17786998a2a729
--- /dev/null
+++ b/clang/test/Modules/pr90259.cppm
@@ -0,0 +1,44 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/mod1.cppm -emit-reduced-module-interface -o %t/mod-mod1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/mod.cppm -fprebuilt-module-path=%t  \
+// RUN:     -emit-reduced-module-interface -o %t/mod.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use.cpp -fprebuilt-module-path=%t -verify -fsyntax-only
+
+//--- mod1.cppm
+export module mod:mod1;
+namespace {
+    int abc = 43;
+}
+namespace mod {
+    static int def = 44;
+}
+export int f() {
+    return abc + mod::def;
+}
+
+//--- mod.cppm
+// expected-no-diagnostics
+export module mod;
+import :mod1;
+
+namespace {
+    double abc = 43.0;
+}
+
+namespace mod {
+    static double def = 44.0;
+}
+
+export double func() {
+    return (double)f() + abc + mod::def;
+}
+
+//--- use.cpp
+// expected-no-diagnostics
+import mod;
+double use() {
+    return func();
+}

From 62d6560471f0e1151e34c0a56357423350f7a6af Mon Sep 17 00:00:00 2001
From: thetruestblue <92476612+thetruestblue@users.noreply.github.com>
Date: Mon, 29 Apr 2024 21:50:57 -0700
Subject: [PATCH 16/65] Disable test for lsan and x86_64h (#90483)

Disable this test on x86_64h for LSan.

This test is failing with malformed object only on x86_64h.
Disabling for now.

rdar://125052424
---
 .../TestCases/sanitizer_coverage_trace_pc_guard.cpp             | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cpp b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cpp
index ed817961c688cc..e46c2edac4cea1 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/sanitizer_coverage_trace_pc_guard.cpp
@@ -2,6 +2,8 @@
 
 // REQUIRES: has_sancovcc
 // UNSUPPORTED: ubsan,i386-darwin,target={{(powerpc64|s390x|thumb).*}}
+// This test is failing for lsan on darwin on x86_64h.
+// UNSUPPORTED: x86_64h && lsan && darwin
 // XFAIL: tsan
 // XFAIL: android && asan
 

From 326667d727546dad0ce9315aa93a3da698c7c71c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 29 Apr 2024 17:45:50 -0700
Subject: [PATCH 17/65] [RISCV] Merge variable declaration with first
 assignment. NFC

---
 llvm/lib/TargetParser/RISCVISAInfo.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 20182fb06037c2..24b774314fea84 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -398,11 +398,10 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
 
   for (auto &Feature : Features) {
     StringRef ExtName = Feature;
-    bool Experimental = false;
     assert(ExtName.size() > 1 && (ExtName[0] == '+' || ExtName[0] == '-'));
     bool Add = ExtName[0] == '+';
     ExtName = ExtName.drop_front(1); // Drop '+' or '-'
-    Experimental = stripExperimentalPrefix(ExtName);
+    bool Experimental = stripExperimentalPrefix(ExtName);
     auto ExtensionInfos = Experimental
                               ? ArrayRef(SupportedExperimentalExtensions)
                               : ArrayRef(SupportedExtensions);

From 79095b4079e8d4f8176bcc53fdacd2765f310cdb Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 29 Apr 2024 22:05:35 -0700
Subject: [PATCH 18/65] [ELF] --compress-debug-sections=zstd: replace
 ZSTD_c_nbWorkers parallelism with multi-frame parallelism

https://reviews.llvm.org/D133679 utilizes zstd's multithread API to
create one single frame. This provides a higher compression ratio but is
significantly slower than concatenating multiple frames.

With manual parallelism, it is easier to parallelize memcpy in
OutputSection::writeTo for parallel memcpy.

In addition, as the individual allocated decompression buffers are much
smaller, we can make a wild guess (compressed_size/4) without worrying
about a resize (due to wrong guess) would waste memory.
---
 lld/ELF/OutputSections.cpp | 133 ++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 74 deletions(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index eadab9d745d687..3e58ed4bda2d3c 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -364,50 +364,43 @@ template <class ELFT> void OutputSection::maybeCompress() {
   // useful when there are many compressed output sections.
   addralign = 1;
 
+  // Split input into 1-MiB shards.
+  [[maybe_unused]] constexpr size_t shardSize = 1 << 20;
+  auto shardsIn = split(ArrayRef<uint8_t>(buf.get(), size), shardSize);
+  const size_t numShards = shardsIn.size();
+  compressed.numShards = numShards;
+  auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards);
+
 #if LLVM_ENABLE_ZSTD
   // Use ZSTD's streaming compression API which permits parallel workers working
   // on the stream. See http://facebook.github.io/zstd/zstd_manual.html
   // "Streaming compression - HowTo".
   if (ctype == DebugCompressionType::Zstd) {
-    // Allocate a buffer of half of the input size, and grow it by 1.5x if
-    // insufficient.
-    compressed.type = ELFCOMPRESS_ZSTD;
-    compressed.shards = std::make_unique<SmallVector<uint8_t, 0>[]>(1);
-    SmallVector<uint8_t, 0> &out = compressed.shards[0];
-    out.resize_for_overwrite(std::max<size_t>(size / 2, 32));
-    size_t pos = 0;
-
-    ZSTD_CCtx *cctx = ZSTD_createCCtx();
-    // Ignore error if zstd was not built with ZSTD_MULTITHREAD.
-    (void)ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers,
-                                 parallel::strategy.compute_thread_count());
-    ZSTD_outBuffer zob = {out.data(), out.size(), 0};
-    ZSTD_EndDirective directive = ZSTD_e_continue;
-    const size_t blockSize = ZSTD_CStreamInSize();
-    do {
-      const size_t n = std::min(static_cast<size_t>(size - pos), blockSize);
-      if (n == size - pos)
-        directive = ZSTD_e_end;
-      ZSTD_inBuffer zib = {buf.get() + pos, n, 0};
-      size_t bytesRemaining = 0;
-      while (zib.pos != zib.size ||
-             (directive == ZSTD_e_end && bytesRemaining != 0)) {
+    parallelFor(0, numShards, [&](size_t i) {
+      SmallVector<uint8_t, 0> out;
+      ZSTD_CCtx *cctx = ZSTD_createCCtx();
+      ZSTD_inBuffer zib = {shardsIn[i].data(), shardsIn[i].size(), 0};
+      ZSTD_outBuffer zob = {nullptr, 0, 0};
+      size_t size;
+      do {
+        // Allocate a buffer of half of the input size, and grow it by 1.5x if
+        // insufficient.
         if (zob.pos == zob.size) {
-          out.resize_for_overwrite(out.size() * 3 / 2);
-          zob.dst = out.data();
-          zob.size = out.size();
+          out.resize_for_overwrite(
+              zob.size ? zob.size * 3 / 2 : std::max<size_t>(zib.size / 4, 64));
+          zob = {out.data(), out.size(), zob.pos};
         }
-        bytesRemaining = ZSTD_compressStream2(cctx, &zob, &zib, directive);
-        assert(!ZSTD_isError(bytesRemaining));
-      }
-      pos += n;
-    } while (directive != ZSTD_e_end);
-    out.resize(zob.pos);
-    ZSTD_freeCCtx(cctx);
-
-    size = sizeof(Elf_Chdr) + out.size();
-    flags |= SHF_COMPRESSED;
-    return;
+        size = ZSTD_compressStream2(cctx, &zob, &zib, ZSTD_e_end);
+        assert(!ZSTD_isError(size));
+      } while (size != 0);
+      out.truncate(zob.pos);
+      ZSTD_freeCCtx(cctx);
+      shardsOut[i] = std::move(out);
+    });
+    compressed.type = ELFCOMPRESS_ZSTD;
+    size = sizeof(Elf_Chdr);
+    for (size_t i = 0; i != numShards; ++i)
+      size += shardsOut[i].size();
   }
 #endif
 
@@ -417,37 +410,32 @@ template <class ELFT> void OutputSection::maybeCompress() {
   // ~15%. We found that level 7 to 9 doesn't make much difference (~1% more
   // compression) while they take significant amount of time (~2x), so level 6
   // seems enough.
-  const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
-
-  // Split input into 1-MiB shards.
-  constexpr size_t shardSize = 1 << 20;
-  auto shardsIn = split(ArrayRef<uint8_t>(buf.get(), size), shardSize);
-  const size_t numShards = shardsIn.size();
-
-  // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
-  // shards but the last to flush the output to a byte boundary to be
-  // concatenated with the next shard.
-  auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards);
-  auto shardsAdler = std::make_unique<uint32_t[]>(numShards);
-  parallelFor(0, numShards, [&](size_t i) {
-    shardsOut[i] = deflateShard(shardsIn[i], level,
-                                i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH);
-    shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size());
-  });
+  if (ctype == DebugCompressionType::Zlib) {
+    const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
+
+    // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
+    // shards but the last to flush the output to a byte boundary to be
+    // concatenated with the next shard.
+    auto shardsAdler = std::make_unique<uint32_t[]>(numShards);
+    parallelFor(0, numShards, [&](size_t i) {
+      shardsOut[i] = deflateShard(shardsIn[i], level,
+                                  i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH);
+      shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size());
+    });
 
-  // Update section size and combine Alder-32 checksums.
-  uint32_t checksum = 1;       // Initial Adler-32 value
-  size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header
-  for (size_t i = 0; i != numShards; ++i) {
-    size += shardsOut[i].size();
-    checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size());
+    // Update section size and combine Alder-32 checksums.
+    uint32_t checksum = 1;       // Initial Adler-32 value
+    size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header
+    for (size_t i = 0; i != numShards; ++i) {
+      size += shardsOut[i].size();
+      checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size());
+    }
+    size += 4; // checksum
+    compressed.type = ELFCOMPRESS_ZLIB;
+    compressed.checksum = checksum;
   }
-  size += 4; // checksum
 
-  compressed.type = ELFCOMPRESS_ZLIB;
   compressed.shards = std::move(shardsOut);
-  compressed.numShards = numShards;
-  compressed.checksum = checksum;
   flags |= SHF_COMPRESSED;
 #endif
 }
@@ -479,25 +467,22 @@ void OutputSection::writeTo(uint8_t *buf, parallel::TaskGroup &tg) {
     chdr->ch_size = compressed.uncompressedSize;
     chdr->ch_addralign = addralign;
     buf += sizeof(*chdr);
-    if (compressed.type == ELFCOMPRESS_ZSTD) {
-      memcpy(buf, compressed.shards[0].data(), compressed.shards[0].size());
-      return;
+
+    auto offsets = std::make_unique<size_t[]>(compressed.numShards);
+    if (compressed.type == ELFCOMPRESS_ZLIB) {
+      buf[0] = 0x78;  // CMF
+      buf[1] = 0x01;  // FLG: best speed
+      offsets[0] = 2; // zlib header
+      write32be(buf + (size - sizeof(*chdr) - 4), compressed.checksum);
     }
 
     // Compute shard offsets.
-    auto offsets = std::make_unique<size_t[]>(compressed.numShards);
-    offsets[0] = 2; // zlib header
     for (size_t i = 1; i != compressed.numShards; ++i)
       offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size();
-
-    buf[0] = 0x78; // CMF
-    buf[1] = 0x01; // FLG: best speed
     parallelFor(0, compressed.numShards, [&](size_t i) {
       memcpy(buf + offsets[i], compressed.shards[i].data(),
              compressed.shards[i].size());
     });
-
-    write32be(buf + (size - sizeof(*chdr) - 4), compressed.checksum);
     return;
   }
 

From fbe4d991323b026eb64cd3d0ee811854b54ca33f Mon Sep 17 00:00:00 2001
From: Julian Schmidt <git.julian.schmidt@gmail.com>
Date: Tue, 30 Apr 2024 07:22:30 +0200
Subject: [PATCH 19/65] [clang-tidy] fix false-negative for macros in
 `readability-math-missing-parentheses` (#90279)

When a binary operator is the last operand of a macro, the end location
that is past the `BinaryOperator` will be inside the macro and therefore
an
invalid location to insert a `FixIt` into, which is why the check bails
when encountering such a pattern.
However, the end location is only required for the `FixIt` and the
diagnostic can still be emitted, just without an attached fix.
---
 .../MathMissingParenthesesCheck.cpp           | 16 ++++++++------
 .../readability/math-missing-parentheses.cpp  | 22 +++++++++++++++++++
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp
index d1e20b9074cec1..65fd296094915b 100644
--- a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp
@@ -61,19 +61,21 @@ static void addParantheses(const BinaryOperator *BinOp,
     const clang::SourceLocation StartLoc = BinOp->getBeginLoc();
     const clang::SourceLocation EndLoc =
         clang::Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, SM, LangOpts);
-    if (EndLoc.isInvalid())
-      return;
 
-    Check->diag(StartLoc,
-                "'%0' has higher precedence than '%1'; add parentheses to "
-                "explicitly specify the order of operations")
+    auto Diag =
+        Check->diag(StartLoc,
+                    "'%0' has higher precedence than '%1'; add parentheses to "
+                    "explicitly specify the order of operations")
         << (Precedence1 > Precedence2 ? BinOp->getOpcodeStr()
                                       : ParentBinOp->getOpcodeStr())
         << (Precedence1 > Precedence2 ? ParentBinOp->getOpcodeStr()
                                       : BinOp->getOpcodeStr())
-        << FixItHint::CreateInsertion(StartLoc, "(")
-        << FixItHint::CreateInsertion(EndLoc, ")")
         << SourceRange(StartLoc, EndLoc);
+
+    if (EndLoc.isValid()) {
+      Diag << FixItHint::CreateInsertion(StartLoc, "(")
+           << FixItHint::CreateInsertion(EndLoc, ")");
+    }
   }
 
   addParantheses(dyn_cast<BinaryOperator>(BinOp->getLHS()->IgnoreImpCasts()),
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp
index edbe2e1c37c770..a6045c079a4823 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp
@@ -16,6 +16,13 @@ int bar(){
     return 4;
 }
 
+int sink(int);
+#define FUN(ARG) (sink(ARG))
+#define FUN2(ARG) sink((ARG))
+#define FUN3(ARG) sink(ARG)
+#define FUN4(ARG) sink(1 + ARG)
+#define FUN5(ARG) sink(4 * ARG)
+
 class fun{
 public:
     int A;
@@ -117,4 +124,19 @@ void f(){
     //CHECK-MESSAGES: :[[@LINE+2]]:94: warning: '/' has higher precedence than '-'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
     //CHECK-FIXES: int q = (1 MACRO_ADD (2 MACRO_MULTIPLY 3)) MACRO_OR ((4 MACRO_AND 5) MACRO_XOR (6 MACRO_SUBTRACT (7 MACRO_DIVIDE 8)));
     int q = 1 MACRO_ADD 2 MACRO_MULTIPLY 3 MACRO_OR 4 MACRO_AND 5 MACRO_XOR 6 MACRO_SUBTRACT 7 MACRO_DIVIDE 8; // No warning
+
+    //CHECK-MESSAGES: :[[@LINE+1]]:21: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    int r = FUN(0 + 1 * 2);
+
+    //CHECK-MESSAGES: :[[@LINE+1]]:22: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    int s = FUN2(0 + 1 * 2);
+
+    //CHECK-MESSAGES: :[[@LINE+1]]:22: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    int t = FUN3(0 + 1 * 2);
+
+    //CHECK-MESSAGES: :[[@LINE+1]]:18: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    int u = FUN4(1 * 2);
+
+    //CHECK-MESSAGES: :[[@LINE+1]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses]
+    int v = FUN5(0 + 1);
 }

From bd72f7b0ab98531ab579fafe79c7a8967994583a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 30 Apr 2024 13:23:24 +0800
Subject: [PATCH 20/65] [RISCV] Add test case for exact vscale miscompile in
 #90559. NFC

---
 llvm/test/CodeGen/RISCV/rvv/pr90559.ll | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/pr90559.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
new file mode 100644
index 00000000000000..93a251286cf73f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+; FIXME: The i32 load and store pair isn't dead and shouldn't be omitted.
+define void @f(ptr %p) vscale_range(2,2) {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vs4r.v v8, (a0)
+; CHECK-NEXT:    addi a1, a0, 80
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vs1r.v v8, (a1)
+; CHECK-NEXT:    addi a0, a0, 64
+; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    ret
+  %q = getelementptr inbounds i8, ptr %p, i64 84
+  %x = load i32, ptr %q
+  call void @llvm.memset.p0.i64(ptr %p, i8 0, i64 96, i1 false)
+  store i32 %x, ptr %q
+  ret void
+}

From 18268ac0f48d93c2bcddb69732761971669c09ab Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 13:28:52 +0800
Subject: [PATCH 21/65] [NFC] [C++20] [Modules] Use new class
 CXX20ModulesGenerator to generate module file for C++20 modules instead of
 PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for
C++20 modules. But this is slighty more or less odd. This patch tries
to use a new class 'CXX20ModulesGenerator' to generate the module file
for C++20 modules.
---
 clang/include/clang/Serialization/ASTWriter.h | 19 ++++++++++++++++---
 clang/lib/Frontend/FrontendActions.cpp        | 12 ++++--------
 clang/lib/Serialization/GeneratePCH.cpp       | 19 ++++++++-----------
 clang/test/Modules/pr67893.cppm               |  2 +-
 clang/test/Modules/search-partitions.cpp      |  8 +++-----
 5 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 6c45b7348b8552..4e433deaaf2dbc 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,17 +928,30 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class ReducedBMIGenerator : public PCHGenerator {
+class CXX20ModulesGenerator : public PCHGenerator {
 protected:
   virtual Module *getEmittingModule(ASTContext &Ctx) override;
 
+  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                        StringRef OutputFile, bool GeneratingReducedBMI);
+
 public:
-  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
-                      StringRef OutputFile);
+  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                        StringRef OutputFile)
+      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+                              /*GeneratingReducedBMI=*/false) {}
 
   void HandleTranslationUnit(ASTContext &Ctx) override;
 };
 
+class ReducedBMIGenerator : public CXX20ModulesGenerator {
+public:
+  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                      StringRef OutputFile)
+      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+                              /*GeneratingReducedBMI=*/true) {}
+};
+
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 04eb1041326713..454653a31534cd 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,14 +272,10 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr<ASTConsumer>
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) {
-  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
-
-  std::vector<std::unique_ptr<ASTConsumer>> Consumers =
-      CreateMultiplexConsumer(CI, InFile);
-  if (Consumers.empty())
-    return nullptr;
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(std::make_unique<CXX20ModulesGenerator>(
+      CI.getPreprocessor(), CI.getModuleCache(),
+      CI.getFrontendOpts().OutputFile));
 
   if (CI.getFrontendOpts().GenReducedBMI &&
       !CI.getFrontendOpts().ModuleOutputPath.empty()) {
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index bed74399098d7f..7b97b73f7bbd00 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,31 +88,28 @@ ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() {
   return &Writer;
 }
 
-ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor &PP,
-                                         InMemoryModuleCache &ModuleCache,
-                                         StringRef OutputFile)
+CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor &PP,
+                                             InMemoryModuleCache &ModuleCache,
+                                             StringRef OutputFile,
+                                             bool GeneratingReducedBMI)
     : PCHGenerator(
           PP, ModuleCache, OutputFile, llvm::StringRef(),
           std::make_shared<PCHBuffer>(),
           /*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(),
           /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
           /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
-          /*GeneratingReducedBMI=*/true) {}
+          GeneratingReducedBMI) {}
 
-Module *ReducedBMIGenerator::getEmittingModule(ASTContext &Ctx) {
+Module *CXX20ModulesGenerator::getEmittingModule(ASTContext &Ctx) {
   Module *M = Ctx.getCurrentNamedModule();
   assert(M && M->isNamedModuleUnit() &&
-         "ReducedBMIGenerator should only be used with C++20 Named modules.");
+         "CXX20ModulesGenerator should only be used with C++20 Named modules.");
   return M;
 }
 
-void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
-  // We need to do this to make sure the size of reduced BMI not to be larger
-  // than full BMI.
-  //
+void CXX20ModulesGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   // FIMXE: We'd better to wrap such options to a new class ASTWriterOptions
   // since this is not about searching header really.
-  // FIXME2: We'd better to move the class writing full BMI with reduced BMI.
   HeaderSearchOptions &HSOpts =
       getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
diff --git a/clang/test/Modules/pr67893.cppm b/clang/test/Modules/pr67893.cppm
index 58990cec01d666..95479193f8ea2b 100644
--- a/clang/test/Modules/pr67893.cppm
+++ b/clang/test/Modules/pr67893.cppm
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \
 // RUN:      -emit-module-interface -o %t/a.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \
-// RUN:      -emit-module-interface -fprebuilt-module-path=%t
+// RUN:      -emit-module-interface -fprebuilt-module-path=%t -o %t/m.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm  \
 // RUN:      -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm
 
diff --git a/clang/test/Modules/search-partitions.cpp b/clang/test/Modules/search-partitions.cpp
index 92732958db94e6..92f7c637c83384 100644
--- a/clang/test/Modules/search-partitions.cpp
+++ b/clang/test/Modules/search-partitions.cpp
@@ -11,7 +11,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/moduleA.cpp \
+// RUN: %clang_cc1 -std=c++20 %t/moduleA.cpp -fsyntax-only -verify \
 // RUN:  -fprebuilt-module-path=%t
 
 // Test again with reduced BMI
@@ -28,9 +28,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/moduleA.cpp -fprebuilt-module-path=%t 
-
-// expected-no-diagnostics
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %t/moduleA.cpp -fprebuilt-module-path=%t 
 
 //--- partition1.cpp
 export module A:Part1;
@@ -50,7 +48,7 @@ export module A:Part3;
 int part3();
 
 //--- moduleA.cpp
-
+// expected-no-diagnostics
 export module A;
 
 import :Part1;

From 705636a1130551ab105aec95b909a35a0305fc9f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 29 Apr 2024 22:44:24 -0700
Subject: [PATCH 22/65] [SelectionDAG][RISCV] Move VP_REDUCE* legalization to
 LegalizeDAG.cpp. (#90522)

LegalizeVectorType is responsible for legalizing nodes that perform an
operation on each element may need to scalarize.

This is not true for nodes like VP_REDUCE.*, BUILD_VECTOR,
SHUFFLE_VECTOR, EXTRACT_SUBVECTOR, etc.

This patch drops any nodes with a scalar result from LegalizeVectorOps
and handles them in LegalizeDAG instead.

This required moving the reduction promotion to LegalizeDAG. I have
removed the support integer promotion as it was incorrect for integer
min/max reductions. Since it was untested, it was best to assert on it
until it was really needed.

There are a couple regressions that can be fixed with a small DAG
combine which I will do as a follow up.
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 62 +++++++++++++++-
 .../SelectionDAG/LegalizeVectorOps.cpp        | 73 ++-----------------
 .../rvv/fixed-vectors-reduction-int-vp.ll     | 26 ++++---
 .../CodeGen/RISCV/rvv/vreductions-int-vp.ll   |  7 +-
 4 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 46e54b5366d66a..398b5fee990b5d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -180,6 +180,13 @@ class SelectionDAGLegalize {
                              SmallVectorImpl<SDValue> &Results);
   SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
 
+  /// Implements vector reduce operation promotion.
+  ///
+  /// All vector operands are promoted to a vector type with larger element
+  /// type, and the start value is promoted to a larger scalar type. Then the
+  /// result is truncated back to the original scalar type.
+  void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
   SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
@@ -2979,6 +2986,47 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
   return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
 }
 
+void SelectionDAGLegalize::PromoteReduction(SDNode *Node,
+                                            SmallVectorImpl<SDValue> &Results) {
+  MVT VecVT = Node->getOperand(1).getSimpleValueType();
+  MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+  MVT ScalarVT = Node->getSimpleValueType(0);
+  MVT NewScalarVT = NewVecVT.getVectorElementType();
+
+  SDLoc DL(Node);
+  SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+  // promote the initial value.
+  // FIXME: Support integer.
+  assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
+         "Only FP promotion is supported");
+  Operands[0] =
+      DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+
+  for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+    if (Node->getOperand(j).getValueType().isVector() &&
+        !(ISD::isVPOpcode(Node->getOpcode()) &&
+          ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
+      // promote the vector operand.
+      // FIXME: Support integer.
+      assert(Node->getOperand(j).getValueType().isFloatingPoint() &&
+             "Only FP promotion is supported");
+      Operands[j] =
+          DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+    } else {
+      Operands[j] = Node->getOperand(j); // Skip VL operand.
+    }
+
+  SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
+                            Node->getFlags());
+
+  assert(ScalarVT.isFloatingPoint() && "Only FP promotion is supported");
+  Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
+                    DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+
+  Results.push_back(Res);
+}
+
 bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   LLVM_DEBUG(dbgs() << "Trying to expand node\n");
   SmallVector<SDValue, 8> Results;
@@ -4955,7 +5003,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
       Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
       Node->getOpcode() == ISD::STRICT_FSETCC ||
-      Node->getOpcode() == ISD::STRICT_FSETCCS)
+      Node->getOpcode() == ISD::STRICT_FSETCCS ||
+      Node->getOpcode() == ISD::VP_REDUCE_FADD ||
+      Node->getOpcode() == ISD::VP_REDUCE_FMUL ||
+      Node->getOpcode() == ISD::VP_REDUCE_FMAX ||
+      Node->getOpcode() == ISD::VP_REDUCE_FMIN ||
+      Node->getOpcode() == ISD::VP_REDUCE_SEQ_FADD)
     OVT = Node->getOperand(1).getSimpleValueType();
   if (Node->getOpcode() == ISD::BR_CC ||
       Node->getOpcode() == ISD::SELECT_CC)
@@ -5613,6 +5666,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                     DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
     break;
   }
+  case ISD::VP_REDUCE_FADD:
+  case ISD::VP_REDUCE_FMUL:
+  case ISD::VP_REDUCE_FMAX:
+  case ISD::VP_REDUCE_FMIN:
+  case ISD::VP_REDUCE_SEQ_FADD:
+    PromoteReduction(Node, Results);
+    break;
   }
 
   // Replace the original node with the legalized result.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 8f87ee8e09393a..423df9ae6b2a55 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -176,13 +176,6 @@ class VectorLegalizer {
   /// truncated back to the original type.
   void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
-  /// Implements vector reduce operation promotion.
-  ///
-  /// All vector operands are promoted to a vector type with larger element
-  /// type, and the start value is promoted to a larger scalar type. Then the
-  /// result is truncated back to the original scalar type.
-  void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-
   /// Implements vector setcc operation promotion.
   ///
   /// All vector operands are promoted to a vector type with larger element
@@ -510,6 +503,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       if (Action != TargetLowering::Legal)                                     \
         break;                                                                 \
     }                                                                          \
+    /* Defer non-vector results to LegalizeDAG. */                             \
+    if (!Node->getValueType(0).isVector()) {                                   \
+      Action = TargetLowering::Legal;                                          \
+      break;                                                                   \
+    }                                                                          \
     Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT);            \
   } break;
 #include "llvm/IR/VPIntrinsics.def"
@@ -580,50 +578,6 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
   return true;
 }
 
-void VectorLegalizer::PromoteReduction(SDNode *Node,
-                                       SmallVectorImpl<SDValue> &Results) {
-  MVT VecVT = Node->getOperand(1).getSimpleValueType();
-  MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
-  MVT ScalarVT = Node->getSimpleValueType(0);
-  MVT NewScalarVT = NewVecVT.getVectorElementType();
-
-  SDLoc DL(Node);
-  SmallVector<SDValue, 4> Operands(Node->getNumOperands());
-
-  // promote the initial value.
-  if (Node->getOperand(0).getValueType().isFloatingPoint())
-    Operands[0] =
-        DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
-  else
-    Operands[0] =
-        DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0));
-
-  for (unsigned j = 1; j != Node->getNumOperands(); ++j)
-    if (Node->getOperand(j).getValueType().isVector() &&
-        !(ISD::isVPOpcode(Node->getOpcode()) &&
-          ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
-      // promote the vector operand.
-      if (Node->getOperand(j).getValueType().isFloatingPoint())
-        Operands[j] =
-            DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
-      else
-        Operands[j] =
-            DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j));
-    else
-      Operands[j] = Node->getOperand(j); // Skip VL operand.
-
-  SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
-                            Node->getFlags());
-
-  if (ScalarVT.isFloatingPoint())
-    Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
-                      DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
-  else
-    Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res);
-
-  Results.push_back(Res);
-}
-
 void VectorLegalizer::PromoteSETCC(SDNode *Node,
                                    SmallVectorImpl<SDValue> &Results) {
   MVT VecVT = Node->getOperand(0).getSimpleValueType();
@@ -708,23 +662,6 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     // Promote the operation by extending the operand.
     PromoteFP_TO_INT(Node, Results);
     return;
-  case ISD::VP_REDUCE_ADD:
-  case ISD::VP_REDUCE_MUL:
-  case ISD::VP_REDUCE_AND:
-  case ISD::VP_REDUCE_OR:
-  case ISD::VP_REDUCE_XOR:
-  case ISD::VP_REDUCE_SMAX:
-  case ISD::VP_REDUCE_SMIN:
-  case ISD::VP_REDUCE_UMAX:
-  case ISD::VP_REDUCE_UMIN:
-  case ISD::VP_REDUCE_FADD:
-  case ISD::VP_REDUCE_FMUL:
-  case ISD::VP_REDUCE_FMAX:
-  case ISD::VP_REDUCE_FMIN:
-  case ISD::VP_REDUCE_SEQ_FADD:
-    // Promote the operation by extending the operand.
-    PromoteReduction(Node, Results);
-    return;
   case ISD::VP_SETCC:
   case ISD::SETCC:
     // Promote the operation by extending the operand.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 02a989a9699606..b874a4477f5d17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -802,25 +802,27 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1>
 ; CHECK-LABEL: vpreduce_xor_v64i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vslidedown.vi v24, v0, 4
-; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:    bltu a1, a3, .LBB49_2
+; CHECK-NEXT:    addi a2, a1, -32
+; CHECK-NEXT:    sltu a3, a1, a2
+; CHECK-NEXT:    addi a3, a3, -1
+; CHECK-NEXT:    li a4, 32
+; CHECK-NEXT:    and a2, a3, a2
+; CHECK-NEXT:    bltu a1, a4, .LBB49_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a2, 32
+; CHECK-NEXT:    li a1, 32
 ; CHECK-NEXT:  .LBB49_2:
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v25, a0
-; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vredxor.vs v25, v8, v25, v0.t
-; CHECK-NEXT:    addi a0, a1, -32
-; CHECK-NEXT:    sltu a1, a1, a0
-; CHECK-NEXT:    addi a1, a1, -1
-; CHECK-NEXT:    and a0, a1, a0
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vredxor.vs v25, v16, v25, v0.t
 ; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vredxor.vs v8, v16, v8, v0.t
+; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
   %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
   ret i32 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index 7bcf37b1af3c8f..95b64cb662a614 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -1115,10 +1115,13 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
 ; CHECK-NEXT:    vmv.s.x v25, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25, v0.t
+; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vredmaxu.vs v25, v16, v25, v0.t
-; CHECK-NEXT:    vmv.x.s a0, v25
+; CHECK-NEXT:    vredmaxu.vs v8, v16, v8, v0.t
+; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    ret
   %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, <vscale x 32 x i32> %v, <vscale x 32 x i1> %m, i32 %evl)
   ret i32 %r

From 6e83058138210ab1e219d04974a071b57b3196e1 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 29 Apr 2024 22:49:10 -0700
Subject: [PATCH 23/65] [RISCV] Use an assert insead of a
 if/else+llvm_unreachable. NFC

---
 llvm/lib/TargetParser/RISCVISAInfo.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 24b774314fea84..aec82a94cf97bc 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -667,11 +667,10 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     // version since the we don't have clear version scheme for that on
     // ISA spec.
     for (const auto *Ext : RISCVGImplications) {
-      if (auto Version = findDefaultVersion(Ext)) {
-        // Postpone AddExtension until end of this function
-        SeenExtMap[Ext] = {Version->Major, Version->Minor};
-      } else
-        llvm_unreachable("Default extension version not found?");
+      auto Version = findDefaultVersion(Ext);
+      assert(Version && "Default extension version not found?");
+      // Postpone AddExtension until end of this function
+      SeenExtMap[Ext] = {Version->Major, Version->Minor};
     }
   } else {
     // Baseline is `i` or `e`

From fb21343473e33e9a886b42d2fe95d1cec1cd0030 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 13:42:26 +0800
Subject: [PATCH 24/65] [C++20] [Modules] Don't skip pragma diagnostic mappings

Close https://github.com/llvm/llvm-project/issues/75057

Previously, I thought the diagnostic mappings is not meaningful with
modules incorrectly. And this problem get revealed by another change
recently. So this patch tried to rever the previous "optimization"
partially.
---
 clang/lib/Serialization/GeneratePCH.cpp |  1 -
 clang/test/Modules/pr75057.cppm         | 62 +++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/pr75057.cppm

diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index 7b97b73f7bbd00..53dda5f9a38580 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -114,7 +114,6 @@ void CXX20ModulesGenerator::HandleTranslationUnit(ASTContext &Ctx) {
       getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
   HSOpts.ModulesSkipHeaderSearchPaths = true;
-  HSOpts.ModulesSkipPragmaDiagnosticMappings = true;
 
   PCHGenerator::HandleTranslationUnit(Ctx);
 
diff --git a/clang/test/Modules/pr75057.cppm b/clang/test/Modules/pr75057.cppm
new file mode 100644
index 00000000000000..374c324e9f495b
--- /dev/null
+++ b/clang/test/Modules/pr75057.cppm
@@ -0,0 +1,62 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// Treat the behavior of using headers as baseline.
+// RUN: %clang_cc1 -std=c++20 %t/use-header.cc -isystem %t -fsyntax-only -verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t -fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+//--- sys.h
+#ifndef SYS_H
+#define SYS_H
+
+#pragma GCC system_header
+
+template <class C>
+struct [[deprecated]] iterator {};
+
+_Pragma("GCC diagnostic push")
+_Pragma("GCC diagnostic ignored \"-Wdeprecated\"")                         
+_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+template <class C>
+struct reverse_iterator 
+: public iterator<C> {};
+
+_Pragma("GCC diagnostic pop")
+
+template <class T>
+class C {
+public:
+    void i() {
+        reverse_iterator<T> i;
+    }
+};
+
+#endif
+
+//--- use-header.cc
+// expected-no-diagnostics
+// However, we see unexpected warnings
+#include <sys.h>
+
+void use() {
+    C<int>().i();
+}
+
+//--- a.cppm
+module;
+#include <sys.h>
+export module a;
+export using ::iterator;
+export using ::C;
+
+//--- use-module.cc
+// expected-no-diagnostics
+import a;
+
+void use() {
+    C<int>().i();
+}

From 940ef9687f5f19ce02b7fa5d2eb6225f458fbdd9 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp@bytedance.com>
Date: Tue, 30 Apr 2024 14:14:16 +0800
Subject: [PATCH 25/65] [RISCV] Remove hasSideEffects=1 for
 saturating/fault-only-first instructions

Marking them as `hasSideEffects=1` stops some optimizations.

According to `Target.td`:

> // Does the instruction have side effects that are not captured by any
> // operands of the instruction or other flags?
> bit hasSideEffects = ?;

It seems we don't need to set `hasSideEffects` for vleNff since we have
modelled `vl` as an output operand.

As for saturating instructions, I think that explicit Def/Use list
is kind of side effects captured by any operands of the instruction,
so we don't need to set `hasSideEffects` either. And I have just
investigated AArch64's implementation, they don't set this flag and
don't add `Def` list.

These changes make optimizations like `performCombineVMergeAndVOps`
and MachineCSE possible for these instructions.

As a consequence, `copyprop.mir` can't test what we want to test in
https://reviews.llvm.org/D155140, so we replace `vssra.vi` with a
VCIX instruction (it has side effects).

Reviewers: jacquesguan, topperc, preames, asb, lukel97

Reviewed By: topperc, lukel97

Pull Request: https://github.com/llvm/llvm-project/pull/90049
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       | 13 ++--
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |  1 -
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    | 10 +--
 ...regalloc-last-chance-recoloring-failure.ll | 69 ++++++++-----------
 llvm/test/CodeGen/RISCV/rvv/commutable.ll     | 21 +++---
 llvm/test/CodeGen/RISCV/rvv/copyprop.mir      | 17 +++--
 .../RISCV/rvv/rvv-peephole-vmerge-vops.ll     | 32 ++++-----
 7 files changed, 67 insertions(+), 96 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 1d58860a0afc8a..4c4e7351212f8a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -661,7 +661,7 @@ let TargetPrefix = "riscv" in {
         : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
                      llvm_anyint_ty],
-                    [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+                    [IntrNoMem]>, RISCVVIntrinsic {
     let ScalarOperand = 2;
     let VLOperand = 3;
   }
@@ -684,7 +684,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
                      LLVMMatchType<2>],
-                    [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+                    [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
     let ScalarOperand = 2;
     let VLOperand = 4;
   }
@@ -708,7 +708,7 @@ let TargetPrefix = "riscv" in {
         : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
                      llvm_anyint_ty, LLVMMatchType<2>],
-                    [ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
+                    [ImmArg<ArgIndex<3>>, IntrNoMem]>,
                     RISCVVIntrinsic {
     let VLOperand = 4;
   }
@@ -721,7 +721,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
                      LLVMMatchType<2>, LLVMMatchType<2>],
-                    [ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem, IntrHasSideEffects]>,
+                    [ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem]>,
                     RISCVVIntrinsic {
     let VLOperand = 5;
   }
@@ -733,7 +733,7 @@ let TargetPrefix = "riscv" in {
         : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                     [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
                      llvm_anyint_ty, LLVMMatchType<3>],
-                    [ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
+                    [ImmArg<ArgIndex<3>>, IntrNoMem]>,
                     RISCVVIntrinsic {
     let VLOperand = 4;
   }
@@ -746,8 +746,7 @@ let TargetPrefix = "riscv" in {
                     [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
                      LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
                      LLVMMatchType<3>, LLVMMatchType<3>],
-                    [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem,
-                     IntrHasSideEffects]>, RISCVVIntrinsic {
+                    [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>, RISCVVIntrinsic {
     let VLOperand = 5;
   }
   // Input: (vector_in, vector_in, scalar_in, vl, policy)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b0568297a470a7..da1543bd7112a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3668,7 +3668,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
   }
 
   // Skip if True has side effect.
-  // TODO: Support vleff and vlsegff.
   if (TII->get(TrueOpc).hasUnmodeledSideEffects())
     return false;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index fc60a9cc7cd30e..22e54886178467 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6232,7 +6232,7 @@ defm PseudoVSUX : VPseudoIStore<Ordered=false>;
 //===----------------------------------------------------------------------===//
 
 // vleff may update VL register
-let hasSideEffects = 1, Defs = [VL] in
+let Defs = [VL] in
 defm PseudoVL : VPseudoFFLoad;
 
 //===----------------------------------------------------------------------===//
@@ -6248,7 +6248,7 @@ defm PseudoVSOXSEG : VPseudoISegStore<Ordered=true>;
 defm PseudoVSUXSEG : VPseudoISegStore<Ordered=false>;
 
 // vlseg<nf>e<eew>ff.v may update VL register
-let hasSideEffects = 1, Defs = [VL] in {
+let Defs = [VL] in {
 defm PseudoVLSEG : VPseudoUSSegLoadFF;
 }
 
@@ -6450,7 +6450,7 @@ defm PseudoVMV_V : VPseudoUnaryVMV_V_X_I;
 //===----------------------------------------------------------------------===//
 // 12.1. Vector Single-Width Saturating Add and Subtract
 //===----------------------------------------------------------------------===//
-let Defs = [VXSAT], hasSideEffects = 1 in {
+let Defs = [VXSAT] in {
   defm PseudoVSADDU : VPseudoVSALU_VV_VX_VI<Commutable=1>;
   defm PseudoVSADD  : VPseudoVSALU_VV_VX_VI<Commutable=1>;
   defm PseudoVSSUBU : VPseudoVSALU_VV_VX;
@@ -6468,7 +6468,7 @@ defm PseudoVASUB  : VPseudoVAALU_VV_VX_RM;
 //===----------------------------------------------------------------------===//
 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
 //===----------------------------------------------------------------------===//
-let Defs = [VXSAT], hasSideEffects = 1 in {
+let Defs = [VXSAT] in {
   defm PseudoVSMUL : VPseudoVSMUL_VV_VX_RM;
 }
 
@@ -6481,7 +6481,7 @@ defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI_RM<uimm5>;
 //===----------------------------------------------------------------------===//
 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
 //===----------------------------------------------------------------------===//
-let Defs = [VXSAT], hasSideEffects = 1 in {
+let Defs = [VXSAT] in {
   defm PseudoVNCLIP  : VPseudoVNCLP_WV_WX_WI_RM;
   defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI_RM;
 }
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index 3ce56318426ad2..81ef6072449e80 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -26,7 +26,10 @@ define void @last_chance_recoloring_failure() {
 ; CHECK-NEXT:    li a0, 55
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 ; CHECK-NEXT:    vloxseg2ei32.v v16, (a0), v8
-; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add a0, sp, a0
+; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 2
 ; CHECK-NEXT:    vs4r.v v16, (a0) # Unknown-size Folded Spill
@@ -37,37 +40,24 @@ define void @last_chance_recoloring_failure() {
 ; CHECK-NEXT:    li s0, 36
 ; CHECK-NEXT:    vsetvli zero, s0, e16, m4, ta, ma
 ; CHECK-NEXT:    vfwadd.vv v16, v8, v12, v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    call func
-; CHECK-NEXT:    li a0, 32
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vrgather.vv v16, v8, v12, v0.t
 ; CHECK-NEXT:    vsetvli zero, s0, e16, m4, ta, ma
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 2
-; CHECK-NEXT:    vl4r.v v20, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    csrr a1, vlenb
-; CHECK-NEXT:    slli a1, a1, 3
-; CHECK-NEXT:    add a1, sp, a1
-; CHECK-NEXT:    addi a1, a1, 16
-; CHECK-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfwsub.wv v8, v0, v20
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vssubu.vv v16, v16, v8, v0.t
-; CHECK-NEXT:    vsetvli zero, s0, e32, m8, tu, mu
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vfdiv.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    vl4r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    vl4r.v v20, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    addi a0, sp, 16
+; CHECK-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT:    vfwsub.wv v8, v24, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, mu
+; CHECK-NEXT:    vfdiv.vv v8, v24, v8, v0.t
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 4
@@ -109,25 +99,20 @@ define void @last_chance_recoloring_failure() {
 ; SUBREGLIVENESS-NEXT:    addi a0, sp, 16
 ; SUBREGLIVENESS-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; SUBREGLIVENESS-NEXT:    call func
-; SUBREGLIVENESS-NEXT:    li a0, 32
-; SUBREGLIVENESS-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; SUBREGLIVENESS-NEXT:    vrgather.vv v16, v8, v12, v0.t
 ; SUBREGLIVENESS-NEXT:    vsetvli zero, s0, e16, m4, ta, ma
+; SUBREGLIVENESS-NEXT:    csrr a0, vlenb
+; SUBREGLIVENESS-NEXT:    slli a0, a0, 3
+; SUBREGLIVENESS-NEXT:    add a0, sp, a0
+; SUBREGLIVENESS-NEXT:    addi a0, a0, 16
 ; SUBREGLIVENESS-NEXT:    csrr a1, vlenb
-; SUBREGLIVENESS-NEXT:    slli a1, a1, 3
-; SUBREGLIVENESS-NEXT:    add a1, sp, a1
-; SUBREGLIVENESS-NEXT:    addi a1, a1, 16
-; SUBREGLIVENESS-NEXT:    csrr a2, vlenb
-; SUBREGLIVENESS-NEXT:    slli a2, a2, 2
-; SUBREGLIVENESS-NEXT:    vl4r.v v20, (a1) # Unknown-size Folded Reload
-; SUBREGLIVENESS-NEXT:    add a1, a1, a2
-; SUBREGLIVENESS-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
-; SUBREGLIVENESS-NEXT:    addi a1, sp, 16
-; SUBREGLIVENESS-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; SUBREGLIVENESS-NEXT:    vfwsub.wv v8, v24, v20
-; SUBREGLIVENESS-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; SUBREGLIVENESS-NEXT:    vssubu.vv v16, v16, v8, v0.t
-; SUBREGLIVENESS-NEXT:    vsetvli zero, s0, e32, m8, tu, mu
+; SUBREGLIVENESS-NEXT:    slli a1, a1, 2
+; SUBREGLIVENESS-NEXT:    vl4r.v v16, (a0) # Unknown-size Folded Reload
+; SUBREGLIVENESS-NEXT:    add a0, a0, a1
+; SUBREGLIVENESS-NEXT:    vl4r.v v20, (a0) # Unknown-size Folded Reload
+; SUBREGLIVENESS-NEXT:    addi a0, sp, 16
+; SUBREGLIVENESS-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; SUBREGLIVENESS-NEXT:    vfwsub.wv v8, v24, v16
+; SUBREGLIVENESS-NEXT:    vsetvli zero, zero, e32, m8, tu, mu
 ; SUBREGLIVENESS-NEXT:    vfdiv.vv v8, v24, v8, v0.t
 ; SUBREGLIVENESS-NEXT:    vse32.v v8, (a0)
 ; SUBREGLIVENESS-NEXT:    csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll
index 06a6327d3892b6..d94b529bac0175 100644
--- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll
@@ -655,10 +655,9 @@ define <vscale x 1 x i64> @commutable_vsadd_vv(<vscale x 1 x i64> %0, <vscale x
 ; CHECK-LABEL: commutable_vsadd_vv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vsadd.vv v10, v8, v9
-; CHECK-NEXT:    vsadd.vv v8, v9, v8
+; CHECK-NEXT:    vsadd.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsadd.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2)
@@ -673,7 +672,7 @@ define <vscale x 1 x i64> @commutable_vsadd_vv_masked(<vscale x 1 x i64> %0, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; CHECK-NEXT:    vsadd.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vsadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsadd.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v10, v8
 ; CHECK-NEXT:    ret
@@ -689,10 +688,9 @@ define <vscale x 1 x i64> @commutable_vsaddu_vv(<vscale x 1 x i64> %0, <vscale x
 ; CHECK-LABEL: commutable_vsaddu_vv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT:    vsaddu.vv v10, v8, v9
-; CHECK-NEXT:    vsaddu.vv v8, v9, v8
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsaddu.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2)
@@ -707,7 +705,7 @@ define <vscale x 1 x i64> @commutable_vsaddu_vv_masked(<vscale x 1 x i64> %0, <v
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; CHECK-NEXT:    vsaddu.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vsaddu.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsaddu.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v10, v8
 ; CHECK-NEXT:    ret
@@ -794,10 +792,9 @@ define <vscale x 1 x i64> @commutable_vsmul_vv(<vscale x 1 x i64> %0, <vscale x
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; CHECK-NEXT:    csrwi vxrm, 0
-; CHECK-NEXT:    vsmul.vv v10, v8, v9
-; CHECK-NEXT:    vsmul.vv v8, v9, v8
+; CHECK-NEXT:    vsmul.vv v8, v8, v9
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen 0, iXLen %2)
@@ -813,7 +810,7 @@ define <vscale x 1 x i64> @commutable_vsmul_vv_masked(<vscale x 1 x i64> %0, <vs
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; CHECK-NEXT:    csrwi vxrm, 0
 ; CHECK-NEXT:    vsmul.vv v10, v8, v9, v0.t
-; CHECK-NEXT:    vsmul.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vsmul.vv v8, v8, v9, v0.t
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v10, v8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
index eb4c8bfdd67f9a..95c227518f5c4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -start-after=finalize-isel | FileCheck %s
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v,+xsfvcp -start-after=finalize-isel | FileCheck %s
 
 --- |
   define void @foo() {
@@ -7,23 +7,22 @@
   ; CHECK:       # %bb.0: # %entry
   ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
   ; CHECK-NEXT:    vmsne.vi v0, v8, 0
-  ; CHECK-NEXT:    vsll.vi v9, v8, 5
-  ; CHECK-NEXT:    vmerge.vim v9, v9, -1, v0
-  ; CHECK-NEXT:    csrwi vxrm, 0
-  ; CHECK-NEXT:    vssra.vi v8, v8, 2
+  ; CHECK-NEXT:    vsll.vi v8, v8, 5
+  ; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+  ; CHECK-NEXT:    sf.vc.v.x 3, 31, v9, a1
   ; CHECK-NEXT:    bgeu a0, zero, .LBB0_3
   ; CHECK-NEXT:  # %bb.1: # %entry
   ; CHECK-NEXT:    li a2, 128
   ; CHECK-NEXT:    bltu a0, a2, .LBB0_4
   ; CHECK-NEXT:  .LBB0_2: # %entry
-  ; CHECK-NEXT:    vse64.v v8, (a1)
+  ; CHECK-NEXT:    vse64.v v9, (a1)
   ; CHECK-NEXT:    ret
   ; CHECK-NEXT:  .LBB0_3:
-  ; CHECK-NEXT:    vmv.v.i v8, 0
+  ; CHECK-NEXT:    vmv.v.i v9, 0
   ; CHECK-NEXT:    li a2, 128
   ; CHECK-NEXT:    bgeu a0, a2, .LBB0_2
   ; CHECK-NEXT:  .LBB0_4: # %entry
-  ; CHECK-NEXT:    vse64.v v9, (a1)
+  ; CHECK-NEXT:    vse64.v v8, (a1)
   ; CHECK-NEXT:    ret
   entry:
     ret void
@@ -51,7 +50,7 @@ body:             |
     %26:vrnov0 = IMPLICIT_DEF
     %25:vrnov0 = PseudoVMERGE_VIM_M1 %26, %17, -1, $v0, 1, 6 /* e64 */
     %pt8:vr = IMPLICIT_DEF
-    %29:vr = PseudoVSSRA_VI_M1 %pt8, %3, 2, 0, 1, 6 /* e64 */, 0
+    %29:vr = PseudoVC_V_X_SE_M1 3, 31, %2, 1, 6 /* e64 */, implicit-def dead $vcix_state, implicit $vcix_state
     %pt9:vr = IMPLICIT_DEF
     %30:vr = PseudoVMV_V_I_M1 %pt9, 0, 1, 6 /* e64 */, 0
     BGEU %1, $x0, %bb.2
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 970581b4d80a9a..64b3a6f2b4b3f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -194,15 +194,12 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
   ret void
 }
 
-; FIXME: Merge vmerge.vvm and vleffN.v
 declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64)
 define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vpmerge_vleff:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vle32ff.v v9, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %1 = zext i32 %vl to i64
   %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
@@ -634,14 +631,11 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
   ret void
 }
 
-; FIXME: select vselect.vvm and vleffN.v
 define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vpselect_vleff:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vle32ff.v v9, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vle32ff.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
   %1 = zext i32 %vl to i64
   %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1)
@@ -898,22 +892,20 @@ define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale
 define void @test_dag_loop() {
 ; CHECK-LABEL: test_dag_loop:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (zero)
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    vmv.v.i v16, 0
+; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:    vsetivli zero, 0, e8, m4, tu, mu
-; CHECK-NEXT:    vmv4r.v v20, v16
-; CHECK-NEXT:    vssubu.vx v20, v16, zero, v0.t
+; CHECK-NEXT:    vmv4r.v v12, v8
+; CHECK-NEXT:    vssubu.vx v12, v8, zero, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmseq.vv v0, v20, v16
+; CHECK-NEXT:    vmseq.vv v0, v12, v8
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT:    vmv.v.i v16, 0
-; CHECK-NEXT:    vsetivli zero, 1, e16, m8, tu, ma
-; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vsetivli zero, 1, e16, m8, tu, mu
+; CHECK-NEXT:    vle16.v v8, (zero), v0.t
 ; CHECK-NEXT:    vsetivli zero, 0, e16, m8, ta, ma
-; CHECK-NEXT:    vse16.v v16, (zero)
+; CHECK-NEXT:    vse16.v v8, (zero)
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1)

From 6b961e2abfffd8b5a508b5958849b13b0feafa50 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 14:26:52 +0800
Subject: [PATCH 26/65] Revert "[C++20] [Modules] Don't skip pragma diagnostic
 mappings" and "[NFC] [C++20] [Modules] Use new class CXX20ModulesGenerator to
 generate module file for C++20 modules instead of PCHGenerator"

This reverts commit fb21343473e33e9a886b42d2fe95d1cec1cd0030.
and commit 18268ac0f48d93c2bcddb69732761971669c09ab.

It looks like there are some problems about linking the compiler
---
 clang/include/clang/Serialization/ASTWriter.h | 19 +-----
 clang/lib/Frontend/FrontendActions.cpp        | 12 ++--
 clang/lib/Serialization/GeneratePCH.cpp       | 20 +++---
 clang/test/Modules/pr67893.cppm               |  2 +-
 clang/test/Modules/pr75057.cppm               | 62 -------------------
 clang/test/Modules/search-partitions.cpp      |  8 ++-
 6 files changed, 29 insertions(+), 94 deletions(-)
 delete mode 100644 clang/test/Modules/pr75057.cppm

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 4e433deaaf2dbc..6c45b7348b8552 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,30 +928,17 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class CXX20ModulesGenerator : public PCHGenerator {
+class ReducedBMIGenerator : public PCHGenerator {
 protected:
   virtual Module *getEmittingModule(ASTContext &Ctx) override;
 
-  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
-                        StringRef OutputFile, bool GeneratingReducedBMI);
-
 public:
-  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
-                        StringRef OutputFile)
-      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
-                              /*GeneratingReducedBMI=*/false) {}
+  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                      StringRef OutputFile);
 
   void HandleTranslationUnit(ASTContext &Ctx) override;
 };
 
-class ReducedBMIGenerator : public CXX20ModulesGenerator {
-public:
-  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
-                      StringRef OutputFile)
-      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
-                              /*GeneratingReducedBMI=*/true) {}
-};
-
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 454653a31534cd..04eb1041326713 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,10 +272,14 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr<ASTConsumer>
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) {
-  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
-  Consumers.push_back(std::make_unique<CXX20ModulesGenerator>(
-      CI.getPreprocessor(), CI.getModuleCache(),
-      CI.getFrontendOpts().OutputFile));
+  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
+  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
+
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers =
+      CreateMultiplexConsumer(CI, InFile);
+  if (Consumers.empty())
+    return nullptr;
 
   if (CI.getFrontendOpts().GenReducedBMI &&
       !CI.getFrontendOpts().ModuleOutputPath.empty()) {
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index 53dda5f9a38580..bed74399098d7f 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,32 +88,36 @@ ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() {
   return &Writer;
 }
 
-CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor &PP,
-                                             InMemoryModuleCache &ModuleCache,
-                                             StringRef OutputFile,
-                                             bool GeneratingReducedBMI)
+ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor &PP,
+                                         InMemoryModuleCache &ModuleCache,
+                                         StringRef OutputFile)
     : PCHGenerator(
           PP, ModuleCache, OutputFile, llvm::StringRef(),
           std::make_shared<PCHBuffer>(),
           /*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(),
           /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
           /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
-          GeneratingReducedBMI) {}
+          /*GeneratingReducedBMI=*/true) {}
 
-Module *CXX20ModulesGenerator::getEmittingModule(ASTContext &Ctx) {
+Module *ReducedBMIGenerator::getEmittingModule(ASTContext &Ctx) {
   Module *M = Ctx.getCurrentNamedModule();
   assert(M && M->isNamedModuleUnit() &&
-         "CXX20ModulesGenerator should only be used with C++20 Named modules.");
+         "ReducedBMIGenerator should only be used with C++20 Named modules.");
   return M;
 }
 
-void CXX20ModulesGenerator::HandleTranslationUnit(ASTContext &Ctx) {
+void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
+  // We need to do this to make sure the size of reduced BMI not to be larger
+  // than full BMI.
+  //
   // FIMXE: We'd better to wrap such options to a new class ASTWriterOptions
   // since this is not about searching header really.
+  // FIXME2: We'd better to move the class writing full BMI with reduced BMI.
   HeaderSearchOptions &HSOpts =
       getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
   HSOpts.ModulesSkipHeaderSearchPaths = true;
+  HSOpts.ModulesSkipPragmaDiagnosticMappings = true;
 
   PCHGenerator::HandleTranslationUnit(Ctx);
 
diff --git a/clang/test/Modules/pr67893.cppm b/clang/test/Modules/pr67893.cppm
index 95479193f8ea2b..58990cec01d666 100644
--- a/clang/test/Modules/pr67893.cppm
+++ b/clang/test/Modules/pr67893.cppm
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \
 // RUN:      -emit-module-interface -o %t/a.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \
-// RUN:      -emit-module-interface -fprebuilt-module-path=%t -o %t/m.pcm
+// RUN:      -emit-module-interface -fprebuilt-module-path=%t
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm  \
 // RUN:      -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm
 
diff --git a/clang/test/Modules/pr75057.cppm b/clang/test/Modules/pr75057.cppm
deleted file mode 100644
index 374c324e9f495b..00000000000000
--- a/clang/test/Modules/pr75057.cppm
+++ /dev/null
@@ -1,62 +0,0 @@
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-//
-// Treat the behavior of using headers as baseline.
-// RUN: %clang_cc1 -std=c++20 %t/use-header.cc -isystem %t -fsyntax-only -verify
-//
-// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-module-interface -o %t/a.pcm
-// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t -fmodule-file=a=%t/a.pcm -fsyntax-only -verify
-
-//--- sys.h
-#ifndef SYS_H
-#define SYS_H
-
-#pragma GCC system_header
-
-template <class C>
-struct [[deprecated]] iterator {};
-
-_Pragma("GCC diagnostic push")
-_Pragma("GCC diagnostic ignored \"-Wdeprecated\"")                         
-_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-
-template <class C>
-struct reverse_iterator 
-: public iterator<C> {};
-
-_Pragma("GCC diagnostic pop")
-
-template <class T>
-class C {
-public:
-    void i() {
-        reverse_iterator<T> i;
-    }
-};
-
-#endif
-
-//--- use-header.cc
-// expected-no-diagnostics
-// However, we see unexpected warnings
-#include <sys.h>
-
-void use() {
-    C<int>().i();
-}
-
-//--- a.cppm
-module;
-#include <sys.h>
-export module a;
-export using ::iterator;
-export using ::C;
-
-//--- use-module.cc
-// expected-no-diagnostics
-import a;
-
-void use() {
-    C<int>().i();
-}
diff --git a/clang/test/Modules/search-partitions.cpp b/clang/test/Modules/search-partitions.cpp
index 92f7c637c83384..92732958db94e6 100644
--- a/clang/test/Modules/search-partitions.cpp
+++ b/clang/test/Modules/search-partitions.cpp
@@ -11,7 +11,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 %t/moduleA.cpp -fsyntax-only -verify \
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/moduleA.cpp \
 // RUN:  -fprebuilt-module-path=%t
 
 // Test again with reduced BMI
@@ -28,7 +28,9 @@
 // RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %t/moduleA.cpp -fprebuilt-module-path=%t 
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/moduleA.cpp -fprebuilt-module-path=%t 
+
+// expected-no-diagnostics
 
 //--- partition1.cpp
 export module A:Part1;
@@ -48,7 +50,7 @@ export module A:Part3;
 int part3();
 
 //--- moduleA.cpp
-// expected-no-diagnostics
+
 export module A;
 
 import :Part1;

From 2524146b256002bfc70b38008425291f5b3dd08c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 29 Apr 2024 23:35:30 -0700
Subject: [PATCH 27/65] [RISCV] Add DAG combine for (vmv_s_x_vl (undef)
 (vmv_x_s X). (#90524)

We can use the original vector as long as the type of X matches the
result type of the vmv_s_x_vl.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  4 +++
 .../rvv/fixed-vector-i8-index-cornercase.ll   | 17 ++++--------
 .../rvv/fixed-vectors-reduction-int-vp.ll     | 26 +++++++++----------
 .../CodeGen/RISCV/rvv/vreductions-int-vp.ll   |  7 ++---
 4 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 68f4ec5ef49f31..fe8edcf39681db 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16791,6 +16791,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     SDValue Scalar = N->getOperand(1);
     SDValue VL = N->getOperand(2);
 
+    if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
+        Scalar.getOperand(0).getValueType() == N->getValueType(0))
+      return Scalar.getOperand(0);
+
     // Use M1 or smaller to avoid over constraining register allocation
     const MVT M1VT = getLMUL1VT(VT);
     if (M1VT.bitsLT(VT)) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
index 2874db6debd740..875f4f239028b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
@@ -26,17 +26,13 @@ define <512 x i8> @single_source(<512 x i8> %a) {
 ; CHECK-NEXT:    vmv.v.x v8, a1
 ; CHECK-NEXT:    vslide1down.vx v8, v8, a0
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v17, v16, 5
-; CHECK-NEXT:    vmv.x.s a0, v17
-; CHECK-NEXT:    vmv.s.x v24, a0
+; CHECK-NEXT:    vslidedown.vi v24, v16, 5
 ; CHECK-NEXT:    li a0, 432
 ; CHECK-NEXT:    li a1, 431
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, ma
 ; CHECK-NEXT:    vslideup.vx v8, v24, a1
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v16, v16, 4
-; CHECK-NEXT:    vmv.x.s a0, v16
-; CHECK-NEXT:    vmv.s.x v16, a0
 ; CHECK-NEXT:    li a0, 466
 ; CHECK-NEXT:    li a1, 465
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, ma
@@ -109,20 +105,17 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
 ; CHECK-NEXT:    addi a1, sp, 512
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
 ; CHECK-NEXT:    vse8.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v24, 5
 ; CHECK-NEXT:    vmv.x.s a1, v24
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.x v8, a1
-; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v25, v24, 5
-; CHECK-NEXT:    vmv.x.s a1, v25
-; CHECK-NEXT:    vmv.s.x v0, a1
 ; CHECK-NEXT:    li a1, 432
 ; CHECK-NEXT:    li a2, 431
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
 ; CHECK-NEXT:    vslideup.vx v8, v0, a2
 ; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 ; CHECK-NEXT:    vslidedown.vi v24, v24, 4
-; CHECK-NEXT:    vmv.x.s a1, v24
-; CHECK-NEXT:    vmv.s.x v24, a1
 ; CHECK-NEXT:    li a1, 466
 ; CHECK-NEXT:    li a2, 465
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
@@ -130,9 +123,9 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
 ; CHECK-NEXT:    vslideup.vx v8, v24, a2
 ; CHECK-NEXT:    vmv.s.x v24, a1
 ; CHECK-NEXT:    li a1, 478
+; CHECK-NEXT:    li a2, 477
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
 ; CHECK-NEXT:    lbu a1, 1012(sp)
-; CHECK-NEXT:    li a2, 477
 ; CHECK-NEXT:    vslideup.vx v8, v24, a2
 ; CHECK-NEXT:    vmv.s.x v24, a1
 ; CHECK-NEXT:    li a1, 501
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index b874a4477f5d17..02a989a9699606 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -802,27 +802,25 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1>
 ; CHECK-LABEL: vpreduce_xor_v64i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT:    li a3, 32
 ; CHECK-NEXT:    vslidedown.vi v24, v0, 4
-; CHECK-NEXT:    addi a2, a1, -32
-; CHECK-NEXT:    sltu a3, a1, a2
-; CHECK-NEXT:    addi a3, a3, -1
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    and a2, a3, a2
-; CHECK-NEXT:    bltu a1, a4, .LBB49_2
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    bltu a1, a3, .LBB49_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB49_2:
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vmv.s.x v25, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vredxor.vs v25, v8, v25, v0.t
-; CHECK-NEXT:    vmv.x.s a0, v25
-; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv.s.x v8, a0
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vredxor.vs v25, v8, v25, v0.t
+; CHECK-NEXT:    addi a0, a1, -32
+; CHECK-NEXT:    sltu a1, a1, a0
+; CHECK-NEXT:    addi a1, a1, -1
+; CHECK-NEXT:    and a0, a1, a0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vredxor.vs v8, v16, v8, v0.t
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vredxor.vs v25, v16, v25, v0.t
+; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
   ret i32 %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index 95b64cb662a614..7bcf37b1af3c8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -1115,13 +1115,10 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
 ; CHECK-NEXT:    vmv.s.x v25, a0
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vredmaxu.vs v25, v8, v25, v0.t
-; CHECK-NEXT:    vmv.x.s a0, v25
-; CHECK-NEXT:    vsetivli zero, 1, e32, m8, ta, ma
-; CHECK-NEXT:    vmv.s.x v8, a0
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv1r.v v0, v24
-; CHECK-NEXT:    vredmaxu.vs v8, v16, v8, v0.t
-; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    vredmaxu.vs v25, v16, v25, v0.t
+; CHECK-NEXT:    vmv.x.s a0, v25
 ; CHECK-NEXT:    ret
   %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, <vscale x 32 x i32> %v, <vscale x 32 x i1> %m, i32 %evl)
   ret i32 %r

From 4a84d8e4c28d873eacfce53f9fd902d67a08a859 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Tue, 30 Apr 2024 14:38:30 +0800
Subject: [PATCH 28/65] [LoongArch] Support parsing la.tls.desc pseudo
 instruction

Simultaneously implemented parsing support for the `%desc_*` modifiers.

Reviewers: SixWeining, heiher, xen0n

Reviewed By: xen0n, SixWeining

Pull Request: https://github.com/llvm/llvm-project/pull/90158
---
 .../AsmParser/LoongArchAsmParser.cpp          | 170 +++++++++++++++++-
 .../Target/LoongArch/LoongArchInstrInfo.td    |  21 +++
 .../MCTargetDesc/LoongArchFixupKinds.h        |  22 +++
 .../MCTargetDesc/LoongArchMCCodeEmitter.cpp   |  30 ++++
 .../MCTargetDesc/LoongArchMCExpr.cpp          |  32 ++++
 .../LoongArch/MCTargetDesc/LoongArchMCExpr.h  |  10 ++
 llvm/test/MC/LoongArch/Macros/macros-la-bad.s |   3 +
 llvm/test/MC/LoongArch/Macros/macros-la.s     |  45 +++++
 llvm/test/MC/LoongArch/Misc/tls-symbols.s     |  22 +++
 .../MC/LoongArch/Relocations/relocations.s    |  50 ++++++
 10 files changed, 398 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 20284b18428bd8..73cb80245bca4d 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -118,6 +118,13 @@ class LoongArchAsmParser : public MCTargetAsmParser {
   // Helper to emit pseudo instruction "la.tls.gd $rd, $rj, sym".
   void emitLoadAddressTLSGDLarge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
 
+  // Helper to emit pseudo instruction "la.tls.desc $rd, sym".
+  void emitLoadAddressTLSDescAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+  void emitLoadAddressTLSDescPcrel(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+  // Helper to emit pseudo instruction "la.tls.desc $rd, $rj, sym".
+  void emitLoadAddressTLSDescPcrelLarge(MCInst &Inst, SMLoc IDLoc,
+                                        MCStreamer &Out);
+
   // Helper to emit pseudo instruction "li.w/d $rd, $imm".
   void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
 
@@ -132,6 +139,7 @@ class LoongArchAsmParser : public MCTargetAsmParser {
     Match_RequiresOpnd2NotR0R1,
     Match_RequiresAMORdDifferRkRj,
     Match_RequiresLAORdDifferRj,
+    Match_RequiresLAORdR4,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "LoongArchGenAsmMatcher.inc"
 #undef GET_OPERAND_DIAGNOSTIC_TYPES
@@ -267,7 +275,9 @@ class LoongArchOperand : public MCParsedAsmOperand {
     bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None ||
                        VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12 ||
                        VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_LO12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD;
     return IsConstantImm
                ? isInt<12>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -288,7 +298,9 @@ class LoongArchOperand : public MCParsedAsmOperand {
                        VK == LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_HI12 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC64_HI12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_HI12;
     return IsConstantImm
                ? isInt<12>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -311,7 +323,8 @@ class LoongArchOperand : public MCParsedAsmOperand {
                        VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_LO12 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_LO12;
     return IsConstantImm
                ? isUInt<12>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -334,7 +347,8 @@ class LoongArchOperand : public MCParsedAsmOperand {
     bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
     bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None ||
                        VK == LoongArchMCExpr::VK_LoongArch_B16 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12;
+                       VK == LoongArchMCExpr::VK_LoongArch_PCALA_LO12 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL;
     return IsConstantImm
                ? isShiftedInt<16, 2>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -355,7 +369,8 @@ class LoongArchOperand : public MCParsedAsmOperand {
                        VK == LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20;
     return IsConstantImm
                ? isInt<20>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -375,7 +390,8 @@ class LoongArchOperand : public MCParsedAsmOperand {
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_LD_HI20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE_HI20 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_HI20;
     return IsConstantImm
                ? isInt<20>(Imm) && IsValidKind
                : LoongArchAsmParser::classifySymbolRef(getImm(), VK) &&
@@ -396,7 +412,9 @@ class LoongArchOperand : public MCParsedAsmOperand {
                        VK == LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_LO20 ||
                        VK == LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20 ||
-                       VK == LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20;
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_LO20 ||
+                       VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC64_LO20;
 
     return IsConstantImm
                ? isInt<20>(Imm) && IsValidKind
@@ -801,6 +819,13 @@ void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
             MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addImm(0),
             getSTI());
         continue;
+      } else if (VK == LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD) {
+        Out.emitInstruction(MCInstBuilder(Opc)
+                                .addReg(LoongArch::R1)
+                                .addReg(DestReg)
+                                .addExpr(LE),
+                            getSTI());
+        continue;
       }
       Out.emitInstruction(
           MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addExpr(LE),
@@ -833,6 +858,13 @@ void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg,
           MCInstBuilder(Opc).addReg(DestReg).addReg(DestReg).addReg(TmpReg),
           getSTI());
       break;
+    case LoongArch::JIRL:
+      Out.emitInstruction(MCInstBuilder(Opc)
+                              .addReg(LoongArch::R1)
+                              .addReg(LoongArch::R1)
+                              .addExpr(LE),
+                          getSTI());
+      break;
     }
   }
 }
@@ -1116,6 +1148,109 @@ void LoongArchAsmParser::emitLoadAddressTLSGDLarge(MCInst &Inst, SMLoc IDLoc,
   emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out);
 }
 
+void LoongArchAsmParser::emitLoadAddressTLSDescAbs(MCInst &Inst, SMLoc IDLoc,
+                                                   MCStreamer &Out) {
+  // `la.tls.desc $rd, sym` with `la-global-with-abs` feature
+  // for la32 expands to:
+  //   lu12i.w $rd, %desc_hi20(sym)
+  //   ori     $rd, $rd, %desc_lo12(sym)
+  //   ld.w    $ra, $rd, %desc_ld(sym)
+  //   jirl    $ra, $ra, %desc_call(sym)
+  //
+  // for la64 expands to:
+  //   lu12i.w $rd, %desc_hi20(sym)
+  //   ori     $rd, $rd, %desc_lo12(sym)
+  //   lu32i.d $rd, %desc64_lo20(sym)
+  //   lu52i.d $rd, $rd, %desc64_hi12(sym)
+  //   ld.d    $ra, $rd, %desc_ld(sym)
+  //   jirl    $ra, $ra, %desc_call(sym)
+  MCRegister DestReg = Inst.getOperand(0).getReg();
+  const MCExpr *Symbol = Inst.getOpcode() == LoongArch::PseudoLA_TLS_DESC_ABS
+                             ? Inst.getOperand(1).getExpr()
+                             : Inst.getOperand(2).getExpr();
+  unsigned LD = is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
+  InstSeq Insts;
+
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::LU12I_W, LoongArchMCExpr::VK_LoongArch_TLS_DESC_HI20));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::ORI, LoongArchMCExpr::VK_LoongArch_TLS_DESC_LO12));
+
+  if (is64Bit()) {
+    Insts.push_back(LoongArchAsmParser::Inst(
+        LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC64_LO20));
+    Insts.push_back(LoongArchAsmParser::Inst(
+        LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC64_HI12));
+  }
+
+  Insts.push_back(
+      LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::JIRL, LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL));
+
+  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+}
+
+void LoongArchAsmParser::emitLoadAddressTLSDescPcrel(MCInst &Inst, SMLoc IDLoc,
+                                                     MCStreamer &Out) {
+  // la.tls.desc $rd, sym
+  // expands to:
+  //   pcalau12i $rd, %desc_pc_hi20(sym)
+  //   addi.w/d  $rd, $rd, %desc_pc_lo12(sym)
+  //   ld.w/d    $ra, $rd, %desc_ld(sym)
+  //   jirl      $ra, $ra, %desc_call(sym)
+  MCRegister DestReg = Inst.getOperand(0).getReg();
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  unsigned ADDI = is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+  unsigned LD = is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
+  InstSeq Insts;
+
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      ADDI, LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_LO12));
+  Insts.push_back(
+      LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::JIRL, LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL));
+
+  emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out);
+}
+
+void LoongArchAsmParser::emitLoadAddressTLSDescPcrelLarge(MCInst &Inst,
+                                                          SMLoc IDLoc,
+                                                          MCStreamer &Out) {
+  // la.tls.desc $rd, $rj, sym
+  // expands to:
+  //   pcalau12i $rd, %desc_pc_hi20(sym)
+  //   addi.d    $rj, $r0, %desc_pc_lo12(sym)
+  //   lu32i.d   $rj, %desc64_pc_lo20(sym)
+  //   lu52i.d   $rj, $rj, %desc64_pc_hi12(sym)
+  //   add.d     $rd, $rd, $rj
+  //   ld.w/d    $ra, $rd, %desc_ld(sym)
+  //   jirl      $ra, $ra, %desc_call(sym)
+  MCRegister DestReg = Inst.getOperand(0).getReg();
+  MCRegister TmpReg = Inst.getOperand(1).getReg();
+  const MCExpr *Symbol = Inst.getOperand(2).getExpr();
+  InstSeq Insts;
+
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::PCALAU12I, LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::ADDI_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_LO12));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::LU32I_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_LO20));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::LU52I_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_HI12));
+  Insts.push_back(LoongArchAsmParser::Inst(LoongArch::ADD_D));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::LD_D, LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD));
+  Insts.push_back(LoongArchAsmParser::Inst(
+      LoongArch::JIRL, LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL));
+
+  emitLAInstSeq(DestReg, TmpReg, Symbol, Insts, IDLoc, Out);
+}
+
 void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc,
                                      MCStreamer &Out) {
   MCRegister DestReg = Inst.getOperand(0).getReg();
@@ -1211,6 +1346,16 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   case LoongArch::PseudoLA_TLS_GD_LARGE:
     emitLoadAddressTLSGDLarge(Inst, IDLoc, Out);
     return false;
+  case LoongArch::PseudoLA_TLS_DESC_ABS:
+  case LoongArch::PseudoLA_TLS_DESC_ABS_LARGE:
+    emitLoadAddressTLSDescAbs(Inst, IDLoc, Out);
+    return false;
+  case LoongArch::PseudoLA_TLS_DESC_PC:
+    emitLoadAddressTLSDescPcrel(Inst, IDLoc, Out);
+    return false;
+  case LoongArch::PseudoLA_TLS_DESC_PC_LARGE:
+    emitLoadAddressTLSDescPcrelLarge(Inst, IDLoc, Out);
+    return false;
   case LoongArch::PseudoLI_W:
   case LoongArch::PseudoLI_D:
     emitLoadImm(Inst, IDLoc, Out);
@@ -1238,6 +1383,15 @@ unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
         return Match_RequiresAMORdDifferRkRj;
     }
     break;
+  case LoongArch::PseudoLA_TLS_DESC_ABS:
+  case LoongArch::PseudoLA_TLS_DESC_ABS_LARGE:
+  case LoongArch::PseudoLA_TLS_DESC_PC:
+  case LoongArch::PseudoLA_TLS_DESC_PC_LARGE: {
+    unsigned Rd = Inst.getOperand(0).getReg();
+    if (Rd != LoongArch::R4)
+      return Match_RequiresLAORdR4;
+    break;
+  }
   case LoongArch::PseudoLA_PCREL_LARGE:
   case LoongArch::PseudoLA_GOT_LARGE:
   case LoongArch::PseudoLA_TLS_IE_LARGE:
@@ -1376,6 +1530,8 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                  "$rd must be different from both $rk and $rj");
   case Match_RequiresLAORdDifferRj:
     return Error(Operands[1]->getStartLoc(), "$rd must be different from $rj");
+  case Match_RequiresLAORdR4:
+    return Error(Operands[1]->getStartLoc(), "$rd must be $r4");
   case Match_InvalidUImm1:
     return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
                                       /*Upper=*/(1 << 1) - 1);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 80429bc45be14c..958803b52d4ec8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1607,6 +1607,27 @@ def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst),
 } // Defs = [R20], Size = 20
 }
 
+// TLSDESC
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1, Defs = [R1] in {
+def PseudoLA_TLS_DESC_ABS : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src),
+                                   [], "la.tls.desc", "$dst, $src">,
+                                   Requires<[IsLA32, HasLaGlobalWithAbs]>;
+def PseudoLA_TLS_DESC_ABS_LARGE : Pseudo<(outs GPR:$dst),
+                                         (ins GPR:$tmp, bare_symbol:$src), [],
+                                         "la.tls.desc", "$dst, $src">,
+                                  Requires<[IsLA64, HasLaGlobalWithAbs]>;
+def PseudoLA_TLS_DESC_PC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                                  "la.tls.desc", "$dst, $src">;
+}
+
+let isCall = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0,
+    isCodeGenOnly = 0, isAsmParserOnly = 1, Defs = [R1, R4, R20], Size = 32 in
+def PseudoLA_TLS_DESC_PC_LARGE : Pseudo<(outs GPR:$dst),
+                                        (ins GPR:$tmp, bare_symbol:$src), [],
+                                        "la.tls.desc", "$dst, $tmp, $src">,
+                                 Requires<[IsLA64]>;
+
 // Load address inst alias: "la", "la.global" and "la.local".
 // Default:
 //     la = la.global = la.got
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
index 0d19d2b0fb1fe8..fb0587bf3bed7c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h
@@ -114,6 +114,28 @@ enum Fixups {
   // 36-bit fixup corresponding to %call36(foo) for a pair instructions:
   // pcaddu18i+jirl.
   fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36,
+  // 20-bit fixup corresponding to %desc_pc_hi20(foo) for instruction pcalau12i.
+  fixup_loongarch_tls_desc_pc_hi20 =
+      FirstLiteralRelocationKind + ELF::R_LARCH_TLS_DESC_PC_HI20,
+  // 12-bit fixup corresponding to %desc_pc_lo12(foo) for instructions like
+  // addi.w/d.
+  fixup_loongarch_tls_desc_pc_lo12,
+  // 20-bit fixup corresponding to %desc64_pc_lo20(foo) for instruction lu32i.d.
+  fixup_loongarch_tls_desc64_pc_lo20,
+  // 12-bit fixup corresponding to %desc64_pc_hi12(foo) for instruction lu52i.d.
+  fixup_loongarch_tls_desc64_pc_hi12,
+  // 20-bit fixup corresponding to %desc_hi20(foo) for instruction lu12i.w.
+  fixup_loongarch_tls_desc_hi20,
+  // 12-bit fixup corresponding to %desc_lo12(foo) for instruction ori.
+  fixup_loongarch_tls_desc_lo12,
+  // 20-bit fixup corresponding to %desc64_lo20(foo) for instruction lu32i.d.
+  fixup_loongarch_tls_desc64_lo20,
+  // 12-bit fixup corresponding to %desc64_hi12(foo) for instruction lu52i.d.
+  fixup_loongarch_tls_desc64_hi12,
+  // 12-bit fixup corresponding to %desc_ld(foo) for instruction ld.w/d.
+  fixup_loongarch_tls_desc_ld,
+  // 12-bit fixup corresponding to %desc_call(foo) for instruction jirl.
+  fixup_loongarch_tls_desc_call,
 };
 } // end namespace LoongArch
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 9ac0128f251728..83812dc3c62a8b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -244,6 +244,36 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
     case LoongArchMCExpr::VK_LoongArch_CALL36:
       FixupKind = LoongArch::fixup_loongarch_call36;
       break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_pc_hi20;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_LO12:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_pc_lo12;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_LO20:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc64_pc_lo20;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_HI12:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc64_pc_hi12;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_HI20:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_hi20;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_LO12:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_lo12;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC64_LO20:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc64_lo20;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC64_HI12:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc64_hi12;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_ld;
+      break;
+    case LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL:
+      FixupKind = LoongArch::fixup_loongarch_tls_desc_call;
+      break;
     }
   } else if (Kind == MCExpr::SymbolRef &&
              cast<MCSymbolRefExpr>(Expr)->getKind() ==
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
index d6fa3b6e50968c..34f9bc65ec77c3 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp
@@ -140,6 +140,26 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) {
     return "gd_hi20";
   case VK_LoongArch_CALL36:
     return "call36";
+  case VK_LoongArch_TLS_DESC_PC_HI20:
+    return "desc_pc_hi20";
+  case VK_LoongArch_TLS_DESC_PC_LO12:
+    return "desc_pc_lo12";
+  case VK_LoongArch_TLS_DESC64_PC_LO20:
+    return "desc64_pc_lo20";
+  case VK_LoongArch_TLS_DESC64_PC_HI12:
+    return "desc64_pc_hi12";
+  case VK_LoongArch_TLS_DESC_HI20:
+    return "desc_hi20";
+  case VK_LoongArch_TLS_DESC_LO12:
+    return "desc_lo12";
+  case VK_LoongArch_TLS_DESC64_LO20:
+    return "desc64_lo20";
+  case VK_LoongArch_TLS_DESC64_HI12:
+    return "desc64_hi12";
+  case VK_LoongArch_TLS_DESC_LD:
+    return "desc_ld";
+  case VK_LoongArch_TLS_DESC_CALL:
+    return "desc_call";
   }
 }
 
@@ -183,6 +203,16 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) {
       .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20)
       .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20)
       .Case("call36", VK_LoongArch_CALL36)
+      .Case("desc_pc_hi20", VK_LoongArch_TLS_DESC_PC_HI20)
+      .Case("desc_pc_lo12", VK_LoongArch_TLS_DESC_PC_LO12)
+      .Case("desc64_pc_lo20", VK_LoongArch_TLS_DESC64_PC_LO20)
+      .Case("desc64_pc_hi12", VK_LoongArch_TLS_DESC64_PC_HI12)
+      .Case("desc_hi20", VK_LoongArch_TLS_DESC_HI20)
+      .Case("desc_lo12", VK_LoongArch_TLS_DESC_LO12)
+      .Case("desc64_lo20", VK_LoongArch_TLS_DESC64_LO20)
+      .Case("desc64_hi12", VK_LoongArch_TLS_DESC64_HI12)
+      .Case("desc_ld", VK_LoongArch_TLS_DESC_LD)
+      .Case("desc_call", VK_LoongArch_TLS_DESC_CALL)
       .Default(VK_LoongArch_Invalid);
 }
 
@@ -223,6 +253,8 @@ void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case VK_LoongArch_TLS_LD_HI20:
   case VK_LoongArch_TLS_GD_PC_HI20:
   case VK_LoongArch_TLS_GD_HI20:
+  case VK_LoongArch_TLS_DESC_PC_HI20:
+  case VK_LoongArch_TLS_DESC_HI20:
     break;
   }
   fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
index bd828116d7fa46..71dd5bd14e4ee1 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h
@@ -62,6 +62,16 @@ class LoongArchMCExpr : public MCTargetExpr {
     VK_LoongArch_TLS_GD_PC_HI20,
     VK_LoongArch_TLS_GD_HI20,
     VK_LoongArch_CALL36,
+    VK_LoongArch_TLS_DESC_PC_HI20,
+    VK_LoongArch_TLS_DESC_PC_LO12,
+    VK_LoongArch_TLS_DESC64_PC_LO20,
+    VK_LoongArch_TLS_DESC64_PC_HI12,
+    VK_LoongArch_TLS_DESC_HI20,
+    VK_LoongArch_TLS_DESC_LO12,
+    VK_LoongArch_TLS_DESC64_LO20,
+    VK_LoongArch_TLS_DESC64_HI12,
+    VK_LoongArch_TLS_DESC_LD,
+    VK_LoongArch_TLS_DESC_CALL,
     VK_LoongArch_Invalid // Must be the last item.
   };
 
diff --git a/llvm/test/MC/LoongArch/Macros/macros-la-bad.s b/llvm/test/MC/LoongArch/Macros/macros-la-bad.s
index 03c6355e40b092..29c9745e4ad864 100644
--- a/llvm/test/MC/LoongArch/Macros/macros-la-bad.s
+++ b/llvm/test/MC/LoongArch/Macros/macros-la-bad.s
@@ -11,3 +11,6 @@ la.abs $a0, $a1, sym
 
 la.pcrel $a0, $a0, sym
 # CHECK: :[[#@LINE-1]]:11: error: $rd must be different from $rj
+
+la.tls.desc $a1, sym
+# CHECK: :[[#@LINE-1]]:14: error: $rd must be $r4
diff --git a/llvm/test/MC/LoongArch/Macros/macros-la.s b/llvm/test/MC/LoongArch/Macros/macros-la.s
index 1a1d12d7d7dfd1..5c572c8e75a0f6 100644
--- a/llvm/test/MC/LoongArch/Macros/macros-la.s
+++ b/llvm/test/MC/LoongArch/Macros/macros-la.s
@@ -3,6 +3,8 @@
 # RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC
 # RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.relax
 # RUN: llvm-readobj -r %t.relax | FileCheck %s --check-prefixes=RELOC,RELAX
+# RUN: llvm-mc --triple=loongarch64 --defsym ABS=1 --mattr=+la-global-with-abs \
+# RUN:     %s | FileCheck %s --check-prefix=ABS
 
 # RELOC:      Relocations [
 # RELOC-NEXT:   Section ({{.*}}) .rela.text {
@@ -124,5 +126,48 @@ la.tls.gd $a0, $a1, sym_gd_large
 # RELOC-NEXT: R_LARCH_GOT64_PC_LO20 sym_gd_large 0x0
 # RELOC-NEXT: R_LARCH_GOT64_PC_HI12 sym_gd_large 0x0
 
+la.tls.desc $a0, sym_desc
+# CHECK-NEXT: pcalau12i $a0, %desc_pc_hi20(sym_desc)
+# CHECK-NEXT: addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
+# CHECK-NEXT: ld.d $ra, $a0, %desc_ld(sym_desc)
+# CHECK-NEXT: jirl $ra, $ra, %desc_call(sym_desc)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_DESC_PC_HI20 sym_desc 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_PC_LO12 sym_desc 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_LD sym_desc 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_CALL sym_desc 0x0
+
+la.tls.desc $a0, $a1, sym_desc_large
+# CHECK-NEXT: pcalau12i $a0, %desc_pc_hi20(sym_desc_large)
+# CHECK-NEXT: addi.d $a1, $zero, %desc_pc_lo12(sym_desc_large)
+# CHECK-NEXT: lu32i.d $a1, %desc64_pc_lo20(sym_desc_large)
+# CHECK-NEXT: lu52i.d $a1, $a1, %desc64_pc_hi12(sym_desc_large)
+# CHECK-NEXT: add.d $a0, $a0, $a1
+# CHECK-NEXT: ld.d $ra, $a0, %desc_ld(sym_desc_large)
+# CHECK-NEXT: jirl $ra, $ra, %desc_call(sym_desc_large)
+# CHECK-EMPTY:
+# RELOC-NEXT: R_LARCH_TLS_DESC_PC_HI20 sym_desc_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_PC_LO12 sym_desc_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC64_PC_LO20 sym_desc_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC64_PC_HI12 sym_desc_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_LD sym_desc_large 0x0
+# RELOC-NEXT: R_LARCH_TLS_DESC_CALL sym_desc_large 0x0
+
+
 # RELOC-NEXT:   }
 # RELOC-NEXT: ]
+
+#############################################################
+## with feature: +la-global-with-abs
+#############################################################
+.ifdef ABS
+
+la.tls.desc $a0, sym_desc
+# ABS:      lu12i.w $a0, %desc_hi20(sym_desc)
+# ABS-NEXT: ori $a0, $a0, %desc_lo12(sym_desc)
+# ABS-NEXT: lu32i.d $a0, %desc64_lo20(sym_desc)
+# ABS-NEXT: lu52i.d $a0, $a0, %desc64_hi12(sym_desc)
+# ABS-NEXT: ld.d $ra, $a0, %desc_ld(sym_desc)
+# ABS-NEXT: jirl $ra, $ra, %desc_call(sym_desc)
+
+.endif
diff --git a/llvm/test/MC/LoongArch/Misc/tls-symbols.s b/llvm/test/MC/LoongArch/Misc/tls-symbols.s
index 2f91cbe004d27d..340fea29ed94a3 100644
--- a/llvm/test/MC/LoongArch/Misc/tls-symbols.s
+++ b/llvm/test/MC/LoongArch/Misc/tls-symbols.s
@@ -77,3 +77,25 @@ lu12i.w $a1, %le_hi20(le)
 # CHECK-NEXT:   Other: 0
 # CHECK-NEXT:   Section: Undefined
 # CHECK-NEXT: }
+
+pcalau12i $a1, %desc_pc_hi20(desc_pc)
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT:   Name: desc_pc
+# CHECK-NEXT:   Value: 0x0
+# CHECK-NEXT:   Size: 0
+# CHECK-NEXT:   Binding: Global
+# CHECK-NEXT:   Type: TLS
+# CHECK-NEXT:   Other: 0
+# CHECK-NEXT:   Section: Undefined
+# CHECK-NEXT: }
+
+lu12i.w $a1, %desc_hi20(desc_abs)
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT:   Name: desc_abs
+# CHECK-NEXT:   Value: 0x0
+# CHECK-NEXT:   Size: 0
+# CHECK-NEXT:   Binding: Global
+# CHECK-NEXT:   Type: TLS
+# CHECK-NEXT:   Other: 0
+# CHECK-NEXT:   Section: Undefined
+# CHECK-NEXT: }
diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s
index bec71e10389333..87df59978c6ea3 100644
--- a/llvm/test/MC/LoongArch/Relocations/relocations.s
+++ b/llvm/test/MC/LoongArch/Relocations/relocations.s
@@ -223,3 +223,53 @@ pcaddu18i $t1, %call36(foo)
 # RELOC: R_LARCH_CALL36 foo 0x0
 # INSTR: pcaddu18i $t1, %call36(foo)
 # FIXUP: fixup A - offset: 0, value: %call36(foo), kind: FK_NONE
+
+pcalau12i $t1, %desc_pc_hi20(foo)
+# RELOC: R_LARCH_TLS_DESC_PC_HI20 foo 0x0
+# INSTR: pcalau12i $t1, %desc_pc_hi20(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_pc_hi20(foo), kind: FK_NONE
+
+addi.d $t1, $t1, %desc_pc_lo12(foo)
+# RELOC: R_LARCH_TLS_DESC_PC_LO12 foo 0x0
+# INSTR: addi.d $t1, $t1, %desc_pc_lo12(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_pc_lo12(foo), kind: FK_NONE
+
+lu32i.d $t1, %desc64_pc_lo20(foo)
+# RELOC: R_LARCH_TLS_DESC64_PC_LO20 foo 0x0
+# INSTR: lu32i.d $t1, %desc64_pc_lo20(foo)
+# FIXUP: fixup A - offset: 0, value: %desc64_pc_lo20(foo), kind: FK_NONE
+
+lu52i.d $t1, $t1, %desc64_pc_hi12(foo)
+# RELOC: R_LARCH_TLS_DESC64_PC_HI12 foo 0x0
+# INSTR: lu52i.d $t1, $t1, %desc64_pc_hi12(foo)
+# FIXUP: fixup A - offset: 0, value: %desc64_pc_hi12(foo), kind: FK_NONE
+
+ld.d $ra, $t1, %desc_ld(foo)
+# RELOC: R_LARCH_TLS_DESC_LD foo 0x0
+# INSTR: ld.d $ra, $t1, %desc_ld(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_ld(foo), kind: FK_NONE
+
+jirl $ra, $ra, %desc_call(foo)
+# RELOC: R_LARCH_TLS_DESC_CALL foo 0x0
+# INSTR: jirl $ra, $ra, %desc_call(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_call(foo), kind: FK_NONE
+
+lu12i.w $t1, %desc_hi20(foo)
+# RELOC: R_LARCH_TLS_DESC_HI20 foo 0x0
+# INSTR: lu12i.w $t1, %desc_hi20(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_hi20(foo), kind: FK_NONE
+
+ori $t1, $t1, %desc_lo12(foo)
+# RELOC: R_LARCH_TLS_DESC_LO12 foo 0x0
+# INSTR: ori $t1, $t1, %desc_lo12(foo)
+# FIXUP: fixup A - offset: 0, value: %desc_lo12(foo), kind: FK_NONE
+
+lu32i.d $t1, %desc64_lo20(foo)
+# RELOC: R_LARCH_TLS_DESC64_LO20 foo 0x0
+# INSTR: lu32i.d $t1, %desc64_lo20(foo)
+# FIXUP: fixup A - offset: 0, value: %desc64_lo20(foo), kind: FK_NONE
+
+lu52i.d $t1, $t1, %desc64_hi12(foo)
+# RELOC: R_LARCH_TLS_DESC64_HI12 foo 0x0
+# INSTR: lu52i.d $t1, $t1, %desc64_hi12(foo)
+# FIXUP: fixup A - offset: 0, value: %desc64_hi12(foo), kind: FK_NONE

From ec527b21bb4196355184aa95ef31aa561b8e8b7b Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 13:42:26 +0800
Subject: [PATCH 29/65] [C++20] [Modules] Don't skip pragma diagnostic mappings

Close https://github.com/llvm/llvm-project/issues/75057

Previously, I thought the diagnostic mappings is not meaningful with
modules incorrectly. And this problem get revealed by another change
recently. So this patch tried to rever the previous "optimization"
partially.
---
 clang/lib/Frontend/FrontendActions.cpp  |  1 -
 clang/lib/Serialization/GeneratePCH.cpp |  1 -
 clang/test/Modules/pr75057.cppm         | 66 +++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Modules/pr75057.cppm

diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 04eb1041326713..480dfa8c975933 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -274,7 +274,6 @@ GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) {
   CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
   CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-  CI.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
 
   std::vector<std::unique_ptr<ASTConsumer>> Consumers =
       CreateMultiplexConsumer(CI, InFile);
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index bed74399098d7f..a2ddbe4624aae4 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -117,7 +117,6 @@ void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
       getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
   HSOpts.ModulesSkipHeaderSearchPaths = true;
-  HSOpts.ModulesSkipPragmaDiagnosticMappings = true;
 
   PCHGenerator::HandleTranslationUnit(Ctx);
 
diff --git a/clang/test/Modules/pr75057.cppm b/clang/test/Modules/pr75057.cppm
new file mode 100644
index 00000000000000..96781b3ccacc0b
--- /dev/null
+++ b/clang/test/Modules/pr75057.cppm
@@ -0,0 +1,66 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// Treat the behavior of using headers as baseline.
+// RUN: %clang_cc1 -std=c++20 %t/use-header.cc -isystem %t -fsyntax-only -verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t -fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+// Test again with reduced BMI.
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -isystem %t -emit-reduced-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/use-module.cc -isystem %t -fmodule-file=a=%t/a.pcm -fsyntax-only -verify
+
+//--- sys.h
+#ifndef SYS_H
+#define SYS_H
+
+#pragma GCC system_header
+
+template <class C>
+struct [[deprecated]] iterator {};
+
+_Pragma("GCC diagnostic push")
+_Pragma("GCC diagnostic ignored \"-Wdeprecated\"")                         
+_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+
+template <class C>
+struct reverse_iterator 
+: public iterator<C> {};
+
+_Pragma("GCC diagnostic pop")
+
+template <class T>
+class C {
+public:
+    void i() {
+        reverse_iterator<T> i;
+    }
+};
+
+#endif
+
+//--- use-header.cc
+// expected-no-diagnostics
+// However, we see unexpected warnings
+#include <sys.h>
+
+void use() {
+    C<int>().i();
+}
+
+//--- a.cppm
+module;
+#include <sys.h>
+export module a;
+export using ::iterator;
+export using ::C;
+
+//--- use-module.cc
+// expected-no-diagnostics
+import a;
+
+void use() {
+    C<int>().i();
+}

From f4843acd839f4f8687815560b69fa96ed3cbf8cf Mon Sep 17 00:00:00 2001
From: Christian Sigg <chsigg@users.noreply.github.com>
Date: Tue, 30 Apr 2024 08:45:20 +0200
Subject: [PATCH 30/65] [lldb][bazel] Fix BUILD after
 975eca0e6a3459e59e96b0df33ea0cfbd157c597. (#90564)

---
 .../llvm-project-overlay/lldb/BUILD.bazel     | 23 ++++++++++++++++++-
 .../lldb/source/Plugins/BUILD.bazel           |  1 +
 .../llvm-project-overlay/llvm/BUILD.bazel     |  1 +
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
index 7958c6024875a5..b7b52f3ef59ce6 100644
--- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
@@ -183,6 +183,20 @@ cc_binary(
     ],
 )
 
+gentbl_cc_library(
+    name = "lldb-sbapi-dwarf-enums",
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            ["-gen-lldb-sbapi-dwarf-enum"],
+            "include/lldb/API/SBLanguages.h",
+        ),
+    ],
+    tblgen = ":lldb-tblgen",
+    td_file = "//llvm:include/llvm/BinaryFormat/Dwarf.def",
+    deps = [],
+)
+
 cc_library(
     name = "API",
     srcs = glob([
@@ -192,6 +206,7 @@ cc_library(
     hdrs = glob(["include/lldb/API/**/*.h"]),
     strip_include_prefix = "include",
     deps = [
+        ":lldb-sbapi-dwarf-enums",
         ":Breakpoint",
         ":Commands",
         ":Core",
@@ -272,6 +287,7 @@ cc_library(
     hdrs = glob(["include/lldb/Expression/**/*.h"]),
     strip_include_prefix = "include",
     deps = [
+        ":lldb-sbapi-dwarf-enums",
         ":Core",
         ":Headers",
         ":Host",
@@ -280,6 +296,7 @@ cc_library(
         ":TargetHeaders",
         ":Utility",
         "//lldb/source/Plugins:PluginSymbolFileDWARFHeaders",
+        "//llvm:BinaryFormat",
         "//llvm:Core",
         "//llvm:DebugInfoDWARF",
         "//llvm:ExecutionEngine",
@@ -346,7 +363,10 @@ cc_library(
     name = "ExpressionHeaders",
     hdrs = glob(["include/lldb/Expression/**/*.h"]),
     strip_include_prefix = "include",
-    deps = ["//llvm:ExecutionEngine"],
+    deps = [
+        ":lldb-sbapi-dwarf-enums",
+        "//llvm:ExecutionEngine"
+    ],
 )
 
 cc_library(
@@ -673,6 +693,7 @@ cc_library(
         ":TargetProperties",
         ":Utility",
         "//lldb/source/Plugins:PluginProcessUtility",
+        "//llvm:BinaryFormat",
         "//llvm:MC",
         "//llvm:Support",
     ],
diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
index 13fec77fe567bb..d705af9167d812 100644
--- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
@@ -202,6 +202,7 @@ cc_library(
         "//lldb:SymbolHeaders",
         "//lldb:TargetHeaders",
         "//lldb:Utility",
+        "//llvm:BinaryFormat",
         "//llvm:Core",
         "//llvm:ExecutionEngine",
         "//llvm:IPO",
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index f45f057b63c226..c159204cede7e5 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -22,6 +22,7 @@ licenses(["notice"])
 exports_files([
     "LICENSE.TXT",
     "cmake/modules/llvm-driver-template.cpp.in",
+    "include/llvm/BinaryFormat/Dwarf.def",
     "include/llvm/CodeGen/SDNodeProperties.td",
     "include/llvm/CodeGen/ValueTypes.td",
     "include/llvm/Frontend/Directive/DirectiveBase.td",

From ce12b12d0d786773b60adead18e994d6e4a0e228 Mon Sep 17 00:00:00 2001
From: Kareem Ergawy <kareem.ergawy@amd.com>
Date: Tue, 30 Apr 2024 08:49:51 +0200
Subject: [PATCH 31/65] [mlir][OpenMP] Extend `omp.private` with a `dealloc`
 region (#90456)

Extends `omp.private` with a new region: `dealloc` where deallocation
logic for Fortran deallocatables will be outlined (this will happen in
later PRs).
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 28 +++++++++++++++----
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 27 ++++++++++++++----
 mlir/test/Dialect/OpenMP/invalid.mlir         | 24 +++++++++++++++-
 mlir/test/Dialect/OpenMP/ops.mlir             | 15 ++++++++++
 4 files changed, 82 insertions(+), 12 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 8ab116ce391e29..a40676d071e620 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -158,8 +158,9 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove]> {
   let description = [{
     This operation provides a declaration of how to implement the
     [first]privatization of a variable. The dialect users should provide
-    information about how to create an instance of the type in the alloc region
-    and how to initialize the copy from the original item in the copy region.
+    information about how to create an instance of the type in the alloc region,
+    how to initialize the copy from the original item in the copy region, and if
+    needed, how to deallocate allocated memory in the dealloc region.
 
     Examples:
 
@@ -187,13 +188,26 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove]> {
     }
     ```
 
+    * `private(x)` for "allocatables" would be emitted as:
+    ```mlir
+    omp.private {type = private} @x.privatizer : !some.type alloc {
+    ^bb0(%arg0: !some.type):
+    %0 = ... allocate proper memory for the private clone ...
+    omp.yield(%0 : !fir.ref<i32>)
+    } dealloc {
+    ^bb0(%arg0: !some.type):
+    ... deallocate allocated memory ...
+    omp.yield
+    }
+    ```
+
     There are no restrictions on the body except for:
-    - The `alloc` region has a single argument.
+    - The `alloc` & `dealloc` regions have a single argument.
     - The `copy` region has 2 arguments.
-    - Both regions are terminated by `omp.yield` ops.
+    - All three regions are terminated by `omp.yield` ops.
     The above restrictions and other obvious restrictions (e.g. verifying the
     type of yielded values) are verified by the custom op verifier. The actual
-    contents of the blocks inside both regions are not verified.
+    contents of the blocks inside all regions are not verified.
 
     Instances of this op would then be used by ops that model directives that
     accept data-sharing attribute clauses.
@@ -212,12 +226,14 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove]> {
                        DataSharingClauseTypeAttr:$data_sharing_type);
 
   let regions = (region MinSizedRegion<1>:$alloc_region,
-                        AnyRegion:$copy_region);
+                        AnyRegion:$copy_region,
+                        AnyRegion:$dealloc_region);
 
   let assemblyFormat = [{
     $data_sharing_type $sym_name `:` $type
       `alloc` $alloc_region
       (`copy` $copy_region^)?
+      (`dealloc` $dealloc_region^)?
       attr-dict
   }];
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index f60668dd0cf995..0799090cdea981 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2258,7 +2258,8 @@ void PrivateClauseOp::build(OpBuilder &odsBuilder, OperationState &odsState,
 LogicalResult PrivateClauseOp::verify() {
   Type symType = getType();
 
-  auto verifyTerminator = [&](Operation *terminator) -> LogicalResult {
+  auto verifyTerminator = [&](Operation *terminator,
+                              bool yieldsValue) -> LogicalResult {
     if (!terminator->getBlock()->getSuccessors().empty())
       return success();
 
@@ -2269,6 +2270,14 @@ LogicalResult PrivateClauseOp::verify() {
     YieldOp yieldOp = llvm::cast<YieldOp>(terminator);
     TypeRange yieldedTypes = yieldOp.getResults().getTypes();
 
+    if (!yieldsValue) {
+      if (yieldedTypes.empty())
+        return success();
+
+      return mlir::emitError(terminator->getLoc())
+             << "Did not expect any values to be yielded.";
+    }
+
     if (yieldedTypes.size() == 1 && yieldedTypes.front() == symType)
       return success();
 
@@ -2285,7 +2294,8 @@ LogicalResult PrivateClauseOp::verify() {
   };
 
   auto verifyRegion = [&](Region &region, unsigned expectedNumArgs,
-                          StringRef regionName) -> LogicalResult {
+                          StringRef regionName,
+                          bool yieldsValue) -> LogicalResult {
     assert(!region.empty());
 
     if (region.getNumArguments() != expectedNumArgs)
@@ -2299,14 +2309,15 @@ LogicalResult PrivateClauseOp::verify() {
       if (!block.mightHaveTerminator())
         continue;
 
-      if (failed(verifyTerminator(block.getTerminator())))
+      if (failed(verifyTerminator(block.getTerminator(), yieldsValue)))
         return failure();
     }
 
     return success();
   };
 
-  if (failed(verifyRegion(getAllocRegion(), /*expectedNumArgs=*/1, "alloc")))
+  if (failed(verifyRegion(getAllocRegion(), /*expectedNumArgs=*/1, "alloc",
+                          /*yieldsValue=*/true)))
     return failure();
 
   DataSharingClauseType dsType = getDataSharingType();
@@ -2319,7 +2330,13 @@ LogicalResult PrivateClauseOp::verify() {
         "`firstprivate` clauses require both `alloc` and `copy` regions.");
 
   if (dsType == DataSharingClauseType::FirstPrivate &&
-      failed(verifyRegion(getCopyRegion(), /*expectedNumArgs=*/2, "copy")))
+      failed(verifyRegion(getCopyRegion(), /*expectedNumArgs=*/2, "copy",
+                          /*yieldsValue=*/true)))
+    return failure();
+
+  if (!getDeallocRegion().empty() &&
+      failed(verifyRegion(getDeallocRegion(), /*expectedNumArgs=*/1, "dealloc",
+                          /*yieldsValue=*/false)))
     return failure();
 
   return success();
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index e329b3010017cf..511e7d396c6875 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2151,6 +2151,17 @@ omp.private {type = private} @x.privatizer : i32 alloc {
 
 // -----
 
+omp.private {type = private} @x.privatizer : f32 alloc {
+^bb0(%arg0: f32):
+  omp.yield(%arg0 : f32)
+} dealloc {
+^bb0(%arg0: f32):
+  // expected-error @below {{Did not expect any values to be yielded.}}
+  omp.yield(%arg0 : f32)
+}
+
+// -----
+
 omp.private {type = private} @x.privatizer : i32 alloc {
 ^bb0(%arg0: i32):
   // expected-error @below {{expected exit block terminator to be an `omp.yield` op.}}
@@ -2178,6 +2189,17 @@ omp.private {type = firstprivate} @x.privatizer : f32 alloc {
 
 // -----
 
+// expected-error @below {{`dealloc`: expected 1 region arguments, got: 2}}
+omp.private {type = private} @x.privatizer : f32 alloc {
+^bb0(%arg0: f32):
+  omp.yield(%arg0 : f32)
+} dealloc {
+^bb0(%arg0: f32, %arg1: f32):
+  omp.yield
+}
+
+// -----
+
 // expected-error @below {{`private` clauses require only an `alloc` region.}}
 omp.private {type = private} @x.privatizer : f32 alloc {
 ^bb0(%arg0: f32):
@@ -2249,4 +2271,4 @@ func.func @undefined_privatizer(%arg0: !llvm.ptr) {
       omp.terminator
     }) : (!llvm.ptr) -> ()
   return
-}
\ No newline at end of file
+}
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index a012588f0b5521..60fc10f9d64b73 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -2492,11 +2492,22 @@ func.func @parallel_op_privatizers(%arg0: !llvm.ptr, %arg1: !llvm.ptr) {
   return
 }
 
+// CHECK-LABEL: omp.private {type = private} @a.privatizer : !llvm.ptr alloc {
+omp.private {type = private} @a.privatizer : !llvm.ptr alloc {
+// CHECK: ^bb0(%{{.*}}: {{.*}}):
+^bb0(%arg0: !llvm.ptr):
+  omp.yield(%arg0 : !llvm.ptr)
+}
+
 // CHECK-LABEL: omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
 omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
 // CHECK: ^bb0(%{{.*}}: {{.*}}):
 ^bb0(%arg0: !llvm.ptr):
   omp.yield(%arg0 : !llvm.ptr)
+} dealloc {
+// CHECK: ^bb0(%{{.*}}: {{.*}}):
+^bb0(%arg0: !llvm.ptr):
+  omp.yield
 }
 
 // CHECK-LABEL: omp.private {type = firstprivate} @y.privatizer : !llvm.ptr alloc {
@@ -2509,6 +2520,10 @@ omp.private {type = firstprivate} @y.privatizer : !llvm.ptr alloc {
 // CHECK: ^bb0(%{{.*}}: {{.*}}, %{{.*}}: {{.*}}):
 ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
   omp.yield(%arg0 : !llvm.ptr)
+} dealloc {
+// CHECK: ^bb0(%{{.*}}: {{.*}}):
+^bb0(%arg0: !llvm.ptr):
+  omp.yield
 }
 
 // CHECK-LABEL: parallel_op_reduction_and_private

From 09f160c6298255f520b379b88161fbd1c365b308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <balazs.keri@ericsson.com>
Date: Tue, 30 Apr 2024 09:01:45 +0200
Subject: [PATCH 32/65] [clang][analyzer] Move StreamChecker out of the alpha
 package. (#89247)

---
 clang/docs/analyzer/checkers.rst              | 186 +++++++++---------
 .../clang/StaticAnalyzer/Checkers/Checkers.td |  31 +--
 clang/test/Analysis/analyzer-config.c         |   2 +-
 .../test/Analysis/analyzer-enabled-checkers.c |   1 +
 ...c-library-functions-arg-enabled-checkers.c |   6 +-
 .../std-c-library-functions-arg-weakdeps.c    |   4 +-
 ...td-c-library-functions-vs-stream-checker.c |   8 +-
 clang/test/Analysis/stream-errno-note.c       |   4 +-
 clang/test/Analysis/stream-errno.c            |   4 +-
 clang/test/Analysis/stream-error.c            |   4 +-
 clang/test/Analysis/stream-invalidate.c       |   2 +-
 .../test/Analysis/stream-non-posix-function.c |   2 +-
 clang/test/Analysis/stream-noopen.c           |   2 +-
 clang/test/Analysis/stream-note.c             |   8 +-
 clang/test/Analysis/stream-pedantic.c         |   8 +-
 .../Analysis/stream-stdlibraryfunctionargs.c  |  10 +-
 clang/test/Analysis/stream.c                  |  16 +-
 clang/test/Analysis/stream.cpp                |   2 +-
 clang/www/analyzer/alpha_checks.html          |  57 ------
 clang/www/analyzer/open_projects.html         |  16 --
 20 files changed, 151 insertions(+), 222 deletions(-)

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index fb748d23a53d01..0d87df36ced0e9 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1462,6 +1462,99 @@ checker).
 
 Default value of the option is ``true``.
 
+.. _unix-Stream:
+
+unix.Stream (C)
+"""""""""""""""
+Check C stream handling functions:
+``fopen, fdopen, freopen, tmpfile, fclose, fread, fwrite, fgetc, fgets, fputc, fputs, fprintf, fscanf, ungetc, getdelim, getline, fseek, fseeko, ftell, ftello, fflush, rewind, fgetpos, fsetpos, clearerr, feof, ferror, fileno``.
+
+The checker maintains information about the C stream objects (``FILE *``) and
+can detect error conditions related to use of streams. The following conditions
+are detected:
+
+* The ``FILE *`` pointer passed to the function is NULL (the single exception is
+  ``fflush`` where NULL is allowed).
+* Use of stream after close.
+* Opened stream is not closed.
+* Read from a stream after end-of-file. (This is not a fatal error but reported
+  by the checker. Stream remains in EOF state and the read operation fails.)
+* Use of stream when the file position is indeterminate after a previous failed
+  operation. Some functions (like ``ferror``, ``clearerr``, ``fseek``) are
+  allowed in this state.
+* Invalid 3rd ("``whence``") argument to ``fseek``.
+
+The stream operations are by this checker usually split into two cases, a success
+and a failure case. However, in the case of write operations (like ``fwrite``,
+``fprintf`` and even ``fsetpos``) this behavior could produce a large amount of
+unwanted reports on projects that don't have error checks around the write
+operations, so by default the checker assumes that write operations always succeed.
+This behavior can be controlled by the ``Pedantic`` flag: With
+``-analyzer-config unix.Stream:Pedantic=true`` the checker will model the
+cases where a write operation fails and report situations where this leads to
+erroneous behavior. (The default is ``Pedantic=false``, where write operations
+are assumed to succeed.)
+
+.. code-block:: c
+
+ void test1() {
+   FILE *p = fopen("foo", "r");
+ } // warn: opened file is never closed
+
+ void test2() {
+   FILE *p = fopen("foo", "r");
+   fseek(p, 1, SEEK_SET); // warn: stream pointer might be NULL
+   fclose(p);
+ }
+
+ void test3() {
+   FILE *p = fopen("foo", "r");
+   if (p) {
+     fseek(p, 1, 3); // warn: third arg should be SEEK_SET, SEEK_END, or SEEK_CUR
+     fclose(p);
+   }
+ }
+
+ void test4() {
+   FILE *p = fopen("foo", "r");
+   if (!p)
+     return;
+
+   fclose(p);
+   fclose(p); // warn: stream already closed
+ }
+
+ void test5() {
+   FILE *p = fopen("foo", "r");
+   if (!p)
+     return;
+
+   fgetc(p);
+   if (!ferror(p))
+     fgetc(p); // warn: possible read after end-of-file
+
+   fclose(p);
+ }
+
+ void test6() {
+   FILE *p = fopen("foo", "r");
+   if (!p)
+     return;
+
+   fgetc(p);
+   if (!feof(p))
+     fgetc(p); // warn: file position may be indeterminate after I/O error
+
+   fclose(p);
+ }
+
+**Limitations**
+
+The checker does not track the correspondence between integer file descriptors
+and ``FILE *`` pointers. Operations on standard streams like ``stdin`` are not
+treated specially and are therefore often not recognized (because these streams
+are usually not opened explicitly by the program, and are global variables).
+
 .. _osx-checkers:
 
 osx
@@ -3116,99 +3209,6 @@ Check for misuses of stream APIs. Check for misuses of stream APIs: ``fopen, fcl
    fclose(F); // warn: closing a previously closed file stream
  }
 
-.. _alpha-unix-Stream:
-
-alpha.unix.Stream (C)
-"""""""""""""""""""""
-Check C stream handling functions:
-``fopen, fdopen, freopen, tmpfile, fclose, fread, fwrite, fgetc, fgets, fputc, fputs, fprintf, fscanf, ungetc, getdelim, getline, fseek, fseeko, ftell, ftello, fflush, rewind, fgetpos, fsetpos, clearerr, feof, ferror, fileno``.
-
-The checker maintains information about the C stream objects (``FILE *``) and
-can detect error conditions related to use of streams. The following conditions
-are detected:
-
-* The ``FILE *`` pointer passed to the function is NULL (the single exception is
-  ``fflush`` where NULL is allowed).
-* Use of stream after close.
-* Opened stream is not closed.
-* Read from a stream after end-of-file. (This is not a fatal error but reported
-  by the checker. Stream remains in EOF state and the read operation fails.)
-* Use of stream when the file position is indeterminate after a previous failed
-  operation. Some functions (like ``ferror``, ``clearerr``, ``fseek``) are
-  allowed in this state.
-* Invalid 3rd ("``whence``") argument to ``fseek``.
-
-The stream operations are by this checker usually split into two cases, a success
-and a failure case. However, in the case of write operations (like ``fwrite``,
-``fprintf`` and even ``fsetpos``) this behavior could produce a large amount of
-unwanted reports on projects that don't have error checks around the write
-operations, so by default the checker assumes that write operations always succeed.
-This behavior can be controlled by the ``Pedantic`` flag: With
-``-analyzer-config alpha.unix.Stream:Pedantic=true`` the checker will model the
-cases where a write operation fails and report situations where this leads to
-erroneous behavior. (The default is ``Pedantic=false``, where write operations
-are assumed to succeed.)
-
-.. code-block:: c
-
- void test1() {
-   FILE *p = fopen("foo", "r");
- } // warn: opened file is never closed
-
- void test2() {
-   FILE *p = fopen("foo", "r");
-   fseek(p, 1, SEEK_SET); // warn: stream pointer might be NULL
-   fclose(p);
- }
-
- void test3() {
-   FILE *p = fopen("foo", "r");
-   if (p) {
-     fseek(p, 1, 3); // warn: third arg should be SEEK_SET, SEEK_END, or SEEK_CUR
-     fclose(p);
-   }
- }
-
- void test4() {
-   FILE *p = fopen("foo", "r");
-   if (!p)
-     return;
-
-   fclose(p);
-   fclose(p); // warn: stream already closed
- }
-
- void test5() {
-   FILE *p = fopen("foo", "r");
-   if (!p)
-     return;
-
-   fgetc(p);
-   if (!ferror(p))
-     fgetc(p); // warn: possible read after end-of-file
-
-   fclose(p);
- }
-
- void test6() {
-   FILE *p = fopen("foo", "r");
-   if (!p)
-     return;
-
-   fgetc(p);
-   if (!feof(p))
-     fgetc(p); // warn: file position may be indeterminate after I/O error
-
-   fclose(p);
- }
-
-**Limitations**
-
-The checker does not track the correspondence between integer file descriptors
-and ``FILE *`` pointers. Operations on standard streams like ``stdin`` are not
-treated specially and are therefore often not recognized (because these streams
-are usually not opened explicitly by the program, and are global variables).
-
 .. _alpha-unix-cstring-BufferOverlap:
 
 alpha.unix.cstring.BufferOverlap (C)
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 9aa1c6ddfe4492..520286b57c9fdc 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -563,6 +563,20 @@ def MismatchedDeallocatorChecker : Checker<"MismatchedDeallocator">,
   Dependencies<[DynamicMemoryModeling]>,
   Documentation<HasDocumentation>;
 
+// This must appear before StdCLibraryFunctionsChecker because a dependency.
+def StreamChecker : Checker<"Stream">,
+  HelpText<"Check stream handling functions">,
+  WeakDependencies<[NonNullParamChecker]>,
+  CheckerOptions<[
+    CmdLineOption<Boolean,
+                  "Pedantic",
+                  "If false, assume that stream operations which are often not "
+                  "checked for error do not fail.",
+                  "false",
+                  InAlpha>
+  ]>,
+  Documentation<HasDocumentation>;
+
 def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">,
   HelpText<"Check for invalid arguments of C standard library functions, "
            "and apply relations between arguments and return value">,
@@ -581,7 +595,7 @@ def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">,
                   "true",
                   InAlpha>
   ]>,
-  WeakDependencies<[CallAndMessageChecker, NonNullParamChecker]>,
+  WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>,
   Documentation<HasDocumentation>;
 
 def VforkChecker : Checker<"Vfork">,
@@ -601,20 +615,6 @@ def PthreadLockChecker : Checker<"PthreadLock">,
   Dependencies<[PthreadLockBase]>,
   Documentation<HasDocumentation>;
 
-def StreamChecker : Checker<"Stream">,
-  HelpText<"Check stream handling functions">,
-  WeakDependencies<[NonNullParamChecker]>,
-  CheckerOptions<[
-    CmdLineOption<Boolean,
-                  "Pedantic",
-                  "If false, assume that stream operations which are often not "
-                  "checked for error do not fail."
-                  "fail.",
-                  "false",
-                  InAlpha>
-  ]>,
-  Documentation<HasDocumentation>;
-
 def SimpleStreamChecker : Checker<"SimpleStream">,
   HelpText<"Check for misuses of stream APIs">,
   Documentation<HasDocumentation>;
@@ -1628,6 +1628,7 @@ def TaintTesterChecker : Checker<"TaintTest">,
 def StreamTesterChecker : Checker<"StreamTester">,
   HelpText<"Add test functions to StreamChecker for test and debugging "
            "purposes.">,
+  WeakDependencies<[StreamChecker]>,
   Documentation<NotDocumented>;
 
 def ErrnoTesterChecker : Checker<"ErrnoTest">,
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 23e37a856d09f7..fda920fa3951e0 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -12,7 +12,6 @@
 // CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtExec = 0x04
 // CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtRead = 0x01
 // CHECK-NEXT: alpha.security.taint.TaintPropagation:Config = ""
-// CHECK-NEXT: alpha.unix.Stream:Pedantic = false
 // CHECK-NEXT: apply-fixits = false
 // CHECK-NEXT: assume-controlled-environment = false
 // CHECK-NEXT: avoid-suppressing-null-argument-paths = false
@@ -131,6 +130,7 @@
 // CHECK-NEXT: unix.Errno:AllowErrnoReadOutsideConditionExpressions = true
 // CHECK-NEXT: unix.StdCLibraryFunctions:DisplayLoadedSummaries = false
 // CHECK-NEXT: unix.StdCLibraryFunctions:ModelPOSIX = true
+// CHECK-NEXT: unix.Stream:Pedantic = false
 // CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: verbose-report-filename = false
 // CHECK-NEXT: widen-loops = false
diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c
index 2c4e34f4990bf5..9543ba8ec02fc1 100644
--- a/clang/test/Analysis/analyzer-enabled-checkers.c
+++ b/clang/test/Analysis/analyzer-enabled-checkers.c
@@ -48,6 +48,7 @@
 // CHECK-NEXT: unix.Malloc
 // CHECK-NEXT: unix.MallocSizeof
 // CHECK-NEXT: unix.MismatchedDeallocator
+// CHECK-NEXT: unix.Stream
 // CHECK-NEXT: unix.StdCLibraryFunctions
 // CHECK-NEXT: unix.Vfork
 // CHECK-NEXT: unix.cstring.BadSizeArg
diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
index d4721c0a59a3d0..14aca5a948bf4b 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
@@ -6,7 +6,7 @@
 // RUN:   -Xclang -analyzer-checker=unix.StdCLibraryFunctions \
 // RUN:   -Xclang -analyzer-config \
 // RUN:      -Xclang unix.StdCLibraryFunctions:ModelPOSIX=true \
-// RUN:   -Xclang -analyzer-checker=alpha.unix.Stream \
+// RUN:   -Xclang -analyzer-checker=unix.Stream \
 // RUN:   -Xclang -analyzer-list-enabled-checkers \
 // RUN:   -Xclang -analyzer-display-progress \
 // RUN:   2>&1 | FileCheck %s --implicit-check-not=ANALYZE \
@@ -14,8 +14,6 @@
 
 // CHECK:      OVERVIEW: Clang Static Analyzer Enabled Checkers List
 // CHECK-EMPTY:
-// CHECK-NEXT: core.NonNullParamChecker
-// CHECK-NEXT: alpha.unix.Stream
 // CHECK-NEXT: apiModeling.Errno
 // CHECK-NEXT: apiModeling.TrustNonnull
 // CHECK-NEXT: apiModeling.TrustReturnsNonnull
@@ -26,6 +24,7 @@
 // CHECK-NEXT: core.CallAndMessage
 // CHECK-NEXT: core.DivideZero
 // CHECK-NEXT: core.DynamicTypePropagation
+// CHECK-NEXT: core.NonNullParamChecker
 // CHECK-NEXT: core.NonnilStringConstants
 // CHECK-NEXT: core.NullDereference
 // CHECK-NEXT: core.StackAddrEscapeBase
@@ -57,6 +56,7 @@
 // CHECK-NEXT: unix.Malloc
 // CHECK-NEXT: unix.MallocSizeof
 // CHECK-NEXT: unix.MismatchedDeallocator
+// CHECK-NEXT: unix.Stream
 // CHECK-NEXT: unix.StdCLibraryFunctions
 // CHECK-NEXT: unix.Vfork
 // CHECK-NEXT: unix.cstring.BadSizeArg
diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c
index 5df5a770015b50..1f0d3627fae340 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c
@@ -3,7 +3,7 @@
 
 // RUN: %clang_analyze_cc1 %s \
 // RUN:   -analyzer-checker=core \
-// RUN:   -analyzer-checker=alpha.unix.Stream \
+// RUN:   -analyzer-checker=unix.Stream \
 // RUN:   -analyzer-checker=unix.StdCLibraryFunctions \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \
 // RUN:   -triple x86_64-unknown-linux-gnu \
@@ -57,5 +57,5 @@ void test_notnull_arg(FILE *F) {
 
 void test_notnull_stream_arg(void) {
   fileno(0); // \
-  // expected-warning{{Stream pointer might be NULL [alpha.unix.Stream]}}
+  // expected-warning{{Stream pointer might be NULL [unix.Stream]}}
 }
diff --git a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c
index cac3fe5c5151cd..b99cc30149c912 100644
--- a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c
+++ b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c
@@ -1,7 +1,7 @@
 // Check the case when only the StreamChecker is enabled.
 // RUN: %clang_analyze_cc1 %s \
-// RUN:   -analyzer-checker=core,alpha.unix.Stream \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN:   -analyzer-checker=core,unix.Stream \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false \
 // RUN:   -triple x86_64-unknown-linux \
@@ -19,8 +19,8 @@
 // Check the case when both the StreamChecker and the
 // StdLibraryFunctionsChecker are enabled.
 // RUN: %clang_analyze_cc1 %s \
-// RUN:   -analyzer-checker=core,alpha.unix.Stream \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN:   -analyzer-checker=core,unix.Stream \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-checker=unix.StdCLibraryFunctions \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \
 // RUN:   -analyzer-checker=debug.ExprInspection \
diff --git a/clang/test/Analysis/stream-errno-note.c b/clang/test/Analysis/stream-errno-note.c
index fb12f0bace937f..71ea026ed4de33 100644
--- a/clang/test/Analysis/stream-errno-note.c
+++ b/clang/test/Analysis/stream-errno-note.c
@@ -1,6 +1,6 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core \
-// RUN:   -analyzer-checker=alpha.unix.Stream \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN:   -analyzer-checker=unix.Stream \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-checker=unix.Errno \
 // RUN:   -analyzer-checker=unix.StdCLibraryFunctions \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \
diff --git a/clang/test/Analysis/stream-errno.c b/clang/test/Analysis/stream-errno.c
index 08382eaf6abf9f..b28cc301a4ec25 100644
--- a/clang/test/Analysis/stream-errno.c
+++ b/clang/test/Analysis/stream-errno.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.Errno,unix.StdCLibraryFunctions,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream,unix.Errno,unix.StdCLibraryFunctions,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify %s
 
 #include "Inputs/system-header-simulator.h"
diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c
index 2abf4b900a047f..3f791d13346419 100644
--- a/clang/test/Analysis/stream-error.c
+++ b/clang/test/Analysis/stream-error.c
@@ -1,7 +1,7 @@
 // RUN: %clang_analyze_cc1 -verify %s \
 // RUN: -analyzer-checker=core \
-// RUN: -analyzer-checker=alpha.unix.Stream \
-// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: -analyzer-checker=unix.Stream \
+// RUN: -analyzer-config unix.Stream:Pedantic=true \
 // RUN: -analyzer-checker=debug.StreamTester \
 // RUN: -analyzer-checker=debug.ExprInspection
 
diff --git a/clang/test/Analysis/stream-invalidate.c b/clang/test/Analysis/stream-invalidate.c
index 5046a356d0583d..749c53d164fb5c 100644
--- a/clang/test/Analysis/stream-invalidate.c
+++ b/clang/test/Analysis/stream-invalidate.c
@@ -1,6 +1,6 @@
 // RUN: %clang_analyze_cc1 -verify %s \
 // RUN: -analyzer-checker=core \
-// RUN: -analyzer-checker=alpha.unix.Stream \
+// RUN: -analyzer-checker=unix.Stream \
 // RUN: -analyzer-checker=debug.ExprInspection
 
 #include "Inputs/system-header-simulator.h"
diff --git a/clang/test/Analysis/stream-non-posix-function.c b/clang/test/Analysis/stream-non-posix-function.c
index 091d95a573ddf9..b9ece31cde1568 100644
--- a/clang/test/Analysis/stream-non-posix-function.c
+++ b/clang/test/Analysis/stream-non-posix-function.c
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -fno-builtin -analyzer-checker=core,alpha.unix.Stream -verify %s
+// RUN: %clang_analyze_cc1 -fno-builtin -analyzer-checker=core,unix.Stream -verify %s
 // RUN: %clang_analyze_cc1 -fno-builtin -analyzer-checker=core,alpha.unix.SimpleStream -verify %s
 
 // expected-no-diagnostics
diff --git a/clang/test/Analysis/stream-noopen.c b/clang/test/Analysis/stream-noopen.c
index 644c699d05e246..87761b3afb76b4 100644
--- a/clang/test/Analysis/stream-noopen.c
+++ b/clang/test/Analysis/stream-noopen.c
@@ -1,7 +1,7 @@
 // RUN: %clang_analyze_cc1 -verify %s \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.Errno \
-// RUN:   -analyzer-checker=alpha.unix.Stream \
+// RUN:   -analyzer-checker=unix.Stream \
 // RUN:   -analyzer-checker=unix.StdCLibraryFunctions \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \
 // RUN:   -analyzer-checker=debug.ExprInspection
diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c
index 03a8ff4e468f66..3aef707d50056e 100644
--- a/clang/test/Analysis/stream-note.c
+++ b/clang/test/Analysis/stream-note.c
@@ -1,8 +1,8 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream -analyzer-output text \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream -analyzer-output text \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions -analyzer-output text \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream,unix.StdCLibraryFunctions -analyzer-output text \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=expected,stdargs %s
 
 #include "Inputs/system-header-simulator.h"
diff --git a/clang/test/Analysis/stream-pedantic.c b/clang/test/Analysis/stream-pedantic.c
index 2bbea81d47ef60..2a3dff86789030 100644
--- a/clang/test/Analysis/stream-pedantic.c
+++ b/clang/test/Analysis/stream-pedantic.c
@@ -1,8 +1,8 @@
-// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=false -verify=nopedantic %s
+// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=false -verify=nopedantic %s
 
-// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true -verify=pedantic %s
+// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true -verify=pedantic %s
 
 #include "Inputs/system-header-simulator.h"
 
diff --git a/clang/test/Analysis/stream-stdlibraryfunctionargs.c b/clang/test/Analysis/stream-stdlibraryfunctionargs.c
index 2ea6a8c472c613..c3a6302f9c7afc 100644
--- a/clang/test/Analysis/stream-stdlibraryfunctionargs.c
+++ b/clang/test/Analysis/stream-stdlibraryfunctionargs.c
@@ -1,13 +1,13 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream,unix.StdCLibraryFunctions,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s
 
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s
 
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true \
 // RUN:   -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stdfunc,any %s
 
 #include "Inputs/system-header-simulator.h"
diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c
index 93ed555c89ebd1..db03d90cd8af4e 100644
--- a/clang/test/Analysis/stream.c
+++ b/clang/test/Analysis/stream.c
@@ -1,11 +1,11 @@
-// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s
-// RUN: %clang_analyze_cc1 -triple=armv8-none-linux-eabi -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s
-// RUN: %clang_analyze_cc1 -triple=aarch64-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s
-// RUN: %clang_analyze_cc1 -triple=hexagon -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \
-// RUN:   -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s
+// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true -verify %s
+// RUN: %clang_analyze_cc1 -triple=armv8-none-linux-eabi -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true -verify %s
+// RUN: %clang_analyze_cc1 -triple=aarch64-linux-gnu -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true -verify %s
+// RUN: %clang_analyze_cc1 -triple=hexagon -analyzer-checker=core,unix.Stream,debug.ExprInspection \
+// RUN:   -analyzer-config unix.Stream:Pedantic=true -verify %s
 
 #include "Inputs/system-header-simulator.h"
 #include "Inputs/system-header-simulator-for-malloc.h"
diff --git a/clang/test/Analysis/stream.cpp b/clang/test/Analysis/stream.cpp
index 7eca505bcaf5d9..8c65d9c287fdaf 100644
--- a/clang/test/Analysis/stream.cpp
+++ b/clang/test/Analysis/stream.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.Stream -verify %s
 
 typedef struct _IO_FILE FILE;
 extern FILE *fopen(const char *path, const char *mode);
diff --git a/clang/www/analyzer/alpha_checks.html b/clang/www/analyzer/alpha_checks.html
index f040d1957b0f98..2c8eece41fb2fc 100644
--- a/clang/www/analyzer/alpha_checks.html
+++ b/clang/www/analyzer/alpha_checks.html
@@ -910,63 +910,6 @@ <h3 id="unix_alpha_checkers">Unix Alpha Checkers</h3>
 </pre></div></div></td></tr>
 
 
-<tr><td><a id="alpha.unix.Stream"><div class="namedescr expandable"><span class="name">
-alpha.unix.Stream</span><span class="lang">
-(C)</span><div class="descr">
-Check stream handling functions:<div class=functions>fopen<br>
-tmpfile<br>
-fclose<br>
-fread<br>
-fwrite<br>
-fseek<br>
-ftell<br>
-rewind<br>
-fgetpos<br>
-fsetpos<br>
-clearerr<br>
-feof<br>
-ferror<br>
-fileno</div></div></div></a></td>
-<td><div class="exampleContainer expandable">
-<div class="example"><pre>
-void test() {
-  FILE *p = fopen("foo", "r");
-} // warn: opened file is never closed
-</pre></div><div class="separator"></div>
-<div class="example"><pre>
-void test() {
-  FILE *p = fopen("foo", "r");
-  fseek(p, 1, SEEK_SET); // warn: stream pointer might be NULL
-  fclose(p);
-}
-</pre></div><div class="separator"></div>
-<div class="example"><pre>
-void test() {
-  FILE *p = fopen("foo", "r");
-
-  if (p)
-    fseek(p, 1, 3);
-     // warn: third arg should be SEEK_SET, SEEK_END, or SEEK_CUR
-
-  fclose(p);
-}
-</pre></div><div class="separator"></div>
-<div class="example"><pre>
-void test() {
-  FILE *p = fopen("foo", "r");
-  fclose(p);
-  fclose(p); // warn: already closed
-}
-</pre></div><div class="separator"></div>
-<div class="example"><pre>
-void test() {
-  FILE *p = tmpfile();
-  ftell(p); // warn: stream pointer might be NULL
-  fclose(p);
-}
-</pre></div></div></td></tr>
-
-
 <tr><td><a id="alpha.unix.cstring.BufferOverlap"><div class="namedescr expandable"><span class="name">
 alpha.unix.cstring.BufferOverlap</span><span class="lang">
 (C)</span><div class="descr">
diff --git a/clang/www/analyzer/open_projects.html b/clang/www/analyzer/open_projects.html
index 8e06ce2cb52884..a7c99c6e25bb3c 100644
--- a/clang/www/analyzer/open_projects.html
+++ b/clang/www/analyzer/open_projects.html
@@ -47,22 +47,6 @@ <h1>Open Projects</h1>
       <a href="https://en.wikipedia.org/wiki/Taint_checking">tainted</a> index values.
       <p><i>(Difficulty: Medium)</i></p></p>
       </li>
-
-      <li><code>alpha.unix.StreamChecker</code>
-        <p>A SimpleStreamChecker has been presented in the Building a Checker in 24
-        Hours talk
-        (<a href="https://llvm.org/devmtg/2012-11/Zaks-Rose-Checker24Hours.pdf">slides</a>
-        <a href="https://youtu.be/kdxlsP5QVPw">video</a>).</p>
-
-        <p>This alpha checker is an attempt to write a production grade stream checker.
-        However, it was found to have an unacceptably high false positive rate.
-        One of the found problems was that eagerly splitting the state
-        based on whether the system call may fail leads to too many reports.
-        A <em>delayed</em> split where the implication is stored in the state
-        (similarly to nullability implications in <code>TrustNonnullChecker</code>)
-        may produce much better results.</p>
-        <p><i>(Difficulty: Medium)</i></p>
-      </li>
     </ul>
   </li>
 

From ff6c0cac7037e78688f589fd635c82e7c1cb42b0 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Tue, 30 Apr 2024 08:14:12 +0100
Subject: [PATCH 33/65] [lldb][Docs] Remove more subtitles from packets doc
 (#90443)

This removes various subtitles or converts them to bold text so that the
table of contents is less cluttered.

This includes "Example", "Notes", "Priority To Implement" and
"Response".
---
 lldb/docs/resources/lldbgdbremote.md | 420 ++++++++++-----------------
 1 file changed, 146 insertions(+), 274 deletions(-)

diff --git a/lldb/docs/resources/lldbgdbremote.md b/lldb/docs/resources/lldbgdbremote.md
index a9fa2a432b7009..aaf903c9a5d13b 100644
--- a/lldb/docs/resources/lldbgdbremote.md
+++ b/lldb/docs/resources/lldbgdbremote.md
@@ -42,10 +42,8 @@ read packet: $OK#9a
 send packet: +
 ```
 
-### Priority To Implement
-
-High. Any GDB remote server that can implement this should if the
-connection is reliable. This improves packet throughput and increases
+**Priority To Implement:** High. Any GDB remote server that can implement this
+should if the connection is reliable. This improves packet throughput and increases
 the performance of the connection.
 
 ## QSupported
@@ -84,9 +82,7 @@ In the example above, three lldb extensions are shown:
       watchpoints, up to a pointer size, `sizeof(void*)`, a reasonable
       baseline assumption.
 
-### Priority To Implement
-
-Optional.
+**Priority To Implement:** Optional
 
 ## "A" - launch args packet
 
@@ -111,10 +107,8 @@ The above packet helps when you have remote debugging abilities where you
 could launch a process on a remote host, this isn't needed for bare board
 debugging.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process.
 
 ## qLaunchSuccess
@@ -125,9 +119,8 @@ Returns the status of the last attempt to launch a process.
 Either `OK` if no error ocurred, or `E` followed by a string
 describing the error.
 
-### Priority To Implement
-
-High, launching processes is a key part of LLDB's platform mode.
+**Priority To Implement:** High, launching processes is a key part of LLDB's
+platform mode.
 
 ## QEnvironment:NAME=VALUE
 
@@ -150,10 +143,8 @@ read packet: $OK#00
 ```
 This packet can be sent one or more times _prior_ to sending a "A" packet.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process.
 
 ## QEnvironmentHexEncoded:HEX-ENCODING(NAME=VALUE)
@@ -173,18 +164,23 @@ read packet: $OK#00
 ```
 This packet can be sent one or more times _prior_ to sending a "A" packet.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process.
 
 ## QEnableErrorStrings
 
 This packet enables reporting of Error strings in remote packet
 replies from the server to client. If the server supports this
-feature, it should send an OK response. The client can expect the
-following error replies if this feature is enabled in the server:
+feature, it should send an OK response.
+
+```
+send packet: $QEnableErrorStrings
+read packet: $OK#00
+```
+
+The client can expect the following error replies if this feature is enabled in
+the server:
 ```
 EXX;AAAAAAAAA
 ```
@@ -195,17 +191,8 @@ It must be noted that even if the client has enabled reporting
 strings in error replies, it must not expect error strings to all
 error replies.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to provide strings that
-are human readable along with an error code.
-
-### Example
-
-```
-send packet: $QEnableErrorStrings
-read packet: $OK#00
-```
+**Priority To Implement:** Low. Only needed if the remote target wants to
+provide strings that are human readable along with an error code.
 
 ## QSetSTDIN:\<ascii-hex-path\> / QSetSTDOUT:\<ascii-hex-path\> / QSetSTDERR:\<ascii-hex-path\>
 
@@ -221,13 +208,10 @@ QSetSTDERR:<ascii-hex-path>
 ```
 These packets must be sent  _prior_ to sending a "A" packet.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process.
 
-
 ## QSetWorkingDir:\<ascii-hex-path\>
 
 Set the working directory prior to sending an "A" packet.
@@ -238,10 +222,8 @@ QSetWorkingDir:<ascii-hex-path>
 ```
 This packet must be sent  _prior_ to sending a "A" packet.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process.
 
 ## qGetWorkingDir
@@ -249,8 +231,6 @@ an inferior process.
 Get the current working directory of the platform stub in
 ASCII hex encoding.
 
-### Example
-
 ```
 receive: qGetWorkingDir
 send:    2f4170706c65496e7465726e616c2f6c6c64622f73657474696e67732f342f5465737453657474696e67732e746573745f646973617373656d626c65725f73657474696e6773
@@ -270,10 +250,8 @@ read packet: OK
 ```
 This packet must be sent  _prior_ to sending a "A" packet.
 
-### Priority To Implement
-
-Low. Only needed if the remote target wants to launch a target after
-making a connection to a GDB server that isn't already connected to
+**Priority To Implement:** Low. Only needed if the remote target wants to launch
+a target after making a connection to a GDB server that isn't already connected to
 an inferior process and if the target supports disabling ASLR
 (Address space layout randomization).
 
@@ -283,26 +261,27 @@ Enable the `threads:` and `thread-pcs:` data in the question-mark packet
 ("T packet") responses when the stub reports that a program has
 stopped executing.
 
-### Priority To Implement
-
-Performance.  This is a performance benefit to lldb if the thread id's
-and thread pc values are provided to lldb in the T stop packet -- if
-they are not provided to lldb, lldb will likely need to send one to
-two packets per thread to fetch the data at every private stop.
-
-### Example
-
 ```
 send packet: QListThreadsInStopReply
 read packet: OK
 ```
 
+**Priority To Implement:** Performance.  This is a performance benefit to lldb
+if the thread id's and thread pc values are provided to lldb in the T stop packet
+-- if they are not provided to lldb, lldb will likely need to send one to
+two packets per thread to fetch the data at every private stop.
+
 ## jLLDBTraceSupported
 
 Get the processor tracing type supported by the gdb-server for the current
 inferior. Responses might be different depending on the architecture and
 capabilities of the underlying OS.
 
+```
+send packet: jLLDBTraceSupported
+read packet: {"name":<name>, "description":<description>}/E<error code>;AAAAAAAAA
+```
+
 ### Output Schema
 
 ```
@@ -317,19 +296,10 @@ capabilities of the underlying OS.
 If no tracing technology is supported for the inferior, or no process is
 running, then an error message is returned.
 
-### Note
-
-This packet is used by Trace plug-ins (see `lldb_private::Trace.h`) to
+**Note:** This packet is used by Trace plug-ins (see `lldb_private::Trace.h`) to
 do live tracing. Specifically, the name of the plug-in should match the name
 of the tracing technology returned by this packet.
 
-### Example
-
-```
-send packet: jLLDBTraceSupported
-read packet: {"name":<name>, "description":<description>}/E<error code>;AAAAAAAAA
-```
-
 ## jLLDBTraceStart
 
 Start tracing a process or its threads using a provided tracing technology.
@@ -341,10 +311,20 @@ response is returned, or an error otherwise.
 This traces existing and future threads of the current process. An error is
 returned if the process is already being traced.
 
+```
+send packet: jLLDBTraceStart:{"type":<type>,...other params}]
+read packet: OK/E<error code>;AAAAAAAAA
+```
+
 ### Thread Tracing
 
 This traces specific threads.
 
+```
+send packet: jLLDBTraceStart:{"type":<type>,"tids":<tids>,...other params}]
+read packet: OK/E<error code>;AAAAAAAAA
+```
+
 ### Input Schema
 
 ```
@@ -360,8 +340,7 @@ This traces specific threads.
 }
 ```
 
-### Notes
-
+**Notes:**
 - If "tids" is not provided, then the operation is "process tracing",
   otherwise it's "thread tracing".
 - Each tracing technology can have different levels of support for "thread
@@ -468,20 +447,6 @@ Notes:
  - If "thread tracing" is attempted on a thread already being traced with
    either "thread tracing" or "process tracing", it fails.
 
-### Examples
-
-Process tracing:
-```
-send packet: jLLDBTraceStart:{"type":<type>,...other params}]
-read packet: OK/E<error code>;AAAAAAAAA
-```
-
-Thread tracing:
-```
-send packet: jLLDBTraceStart:{"type":<type>,"tids":<tids>,...other params}]
-read packet: OK/E<error code>;AAAAAAAAA
-```
-
 ## jLLDBTraceStop
 
 Stop tracing a process or its threads using a provided tracing technology.
@@ -493,11 +458,21 @@ response is returned, or an error otherwise.
 Stopping a process trace stops the active traces initiated with
 "thread tracing".
 
+```
+send packet: jLLDBTraceStop:{"type":<type>}]
+read packet: OK/E<error code>;AAAAAAAAA
+```
+
 ### Thread Trace Stopping
 
 This is a best effort request, which tries to stop as many traces as
 possible.
 
+```
+send packet: jLLDBTraceStop:{"type":<type>,"tids":<tids>}]
+read packet: OK/E<error code>;AAAAAAAAA
+```
+
 ### Input Schema
 
 The schema for the input is
@@ -512,27 +487,12 @@ The schema for the input is
 }
 ```
 
-### Notes
-
-- If "tids" is not provided, then the operation is "process trace stopping".
+**Note:** If `tids` is not provided, then the operation is "process trace stopping".
 
 ### Intel Pt
 
 Stopping a specific thread trace started with "process tracing" is allowed.
 
-### Examples
-
-Process trace stopping:
-```
-send packet: jLLDBTraceStop:{"type":<type>}]
-read packet: OK/E<error code>;AAAAAAAAA
-```
-Thread trace stopping:
-```
-send packet: jLLDBTraceStop:{"type":<type>,"tids":<tids>}]
-read packet: OK/E<error code>;AAAAAAAAA
-```
-
 ## jLLDBTraceGetState
 
 Get the current state of the process and its threads being traced by
@@ -540,6 +500,11 @@ a given trace technology. The response is a JSON object with custom
 information depending on the trace technology. In case of errors, an
 error message is returned.
 
+```
+send packet: jLLDBTraceGetState:{"type":<type>}]
+read packet: {...object}/E<error code>;AAAAAAAAA
+```
+
 ### Input Schema
 
 ```
@@ -594,10 +559,8 @@ error message is returned.
 }
 ```
 
-### Notes
-
- - "traceThreads" includes all thread traced by both "process tracing" and
-   "thread tracing".
+**Note:** `tracedThreads` includes all threads traced by both "process tracing"
+and "thread tracing".
 
 ### Intel Pt
 
@@ -631,13 +594,6 @@ Additional attributes:
       }
       ```
 
-### Example
-
-```
-send packet: jLLDBTraceGetState:{"type":<type>}]
-read packet: {...object}/E<error code>;AAAAAAAAA
-```
-
 ## jLLDBTraceGetBinaryData
 
 Get binary data given a trace technology and a data identifier.
@@ -645,6 +601,11 @@ The input is specified as a JSON object and the response has the same format
 as the "binary memory read" (aka "x") packet. In case of failures, an error
 message is returned.
 
+```
+send packet: jLLDBTraceGetBinaryData:{"type":<type>,"kind":<query>,"tid":<tid>,"offset":<offset>,"size":<size>}]
+read packet: <binary data>/E<error code>;AAAAAAAAA
+```
+
 ### Schema
 
 The schema for the input is:
@@ -661,13 +622,6 @@ The schema for the input is:
 }
 ```
 
-### Example
-
-```
-send packet: jLLDBTraceGetBinaryData:{"type":<type>,"kind":<query>,"tid":<tid>,"offset":<offset>,"size":<size>}]
-read packet: <binary data>/E<error code>;AAAAAAAAA
-```
-
 ## qRegisterInfo\<hex-reg-id\>
 
 Discover register information from the remote GDB server.
@@ -937,16 +891,13 @@ The keys and values are detailed below:
   modifying the CPSR register can cause the r8 - r14 and cpsr value to
   change depending on if the mode has changed.
 
-### Priority To Implement
-
-High. Any target that can self describe its registers, should do so.
-This means if new registers are ever added to a remote target, they
+**Priority To Implement:** High. Any target that can self describe its registers,
+should do so. This means if new registers are ever added to a remote target, they
 will get picked up automatically, and allows registers to change
 depending on the actual CPU type that is used.
 
-NB: `qRegisterInfo` is deprecated in favor of the standard gdb remote
-serial protocol register description method,
-`qXfer:features:read:target.xml`.
+**Note:** `qRegisterInfo` is deprecated in favor of the standard gdb remote
+serial protocol register description method, `qXfer:features:read:target.xml`.
 If `qXfer:features:read:target.xml` is supported, `qRegisterInfo` does
 not need to be implemented.  The target.xml format is used by most
 gdb RSP stubs whereas `qRegisterInfo` was an lldb-only design.
@@ -977,10 +928,7 @@ drwxrwxr-x  5 username groupname    4096 Aug 15 21:36 source.cpp
 -rw-r--r--  1 username groupname    3190 Aug 12 16:46 Makefile
 ```
 
-### Priority To Implement
-
-High. This command allows LLDB clients to run arbitrary shell
-commands on a remote host.
+**Priority To Implement:** High
 
 ## qPlatform_mkdir
 
@@ -997,10 +945,7 @@ Reply:
     (mkdir called successfully and returned with the given return code)
   * `Exx` (An error occurred)
 
-### Priority To Implement
-
-Low. This command allows LLDB clients to create new directories on
-a remote host.
+**Priority To Implement:** Low
 
 ## vFile:chmod / qPlatform_chmod
 
@@ -1013,9 +958,7 @@ Reply:
   (chmod called successfully and returned with the given return code)
 * `Exx` (An error occurred)
 
-### Priority To Implement
-
-Low.
+**Priority To Implement:** Low
 
 ## qHostInfo
 
@@ -1066,10 +1009,8 @@ Key value pairs are one of:
   AArch64 can have different page table setups for low and high
   memory, and therefore a different number of bits used for addressing.
 
-### Priority To Implement
-
-High. This packet is usually very easy to implement and can help
-LLDB select the correct plug-ins for the job based on the target
+**Priority To Implement:** High. This packet is usually very easy to implement
+and can help LLDB select the correct plug-ins for the job based on the target
 triple information that is supplied.
 
 ## qGDBServerVersion
@@ -1107,19 +1048,17 @@ Suggested key names:
 * `major_version`: major version number
 * `minor_version`: minor version number
 
-### Priority To Implement
-
-High. This packet is usually very easy to implement and can help
-LLDB to work around bugs in a server's implementation when they
+**Priority To Implement:** High. This packet is usually very easy to implement
+and can help LLDB to work around bugs in a server's implementation when they
 are found.
 
 ## qProcessInfo
 
 Get information about the process we are currently debugging.
 
-### Priority To Implement
+**Priority To Implement:** Medium
 
-Medium.  On systems which can launch multiple different architecture processes,
+On systems which can launch multiple different architecture processes,
 the qHostInfo may not disambiguate sufficiently to know what kind of
 process is being debugged.
 
@@ -1180,9 +1119,9 @@ send packet: $qShlibInfoAddr#00
 read packet: $7fff5fc40040#00
 ```
 
-### Priority To Implement
+**Priority To Implement:** High
 
-High if you have a dynamic loader plug-in in LLDB for your target
+If you have a dynamic loader plug-in in LLDB for your target
 triple (see the "qHostInfo" packet) that can use this information.
 Many times address load randomization can make it hard to detect
 where the dynamic loader binary and data structures are located and
@@ -1203,9 +1142,9 @@ remote packets love to think that there is only _one_ reason that _one_ thread
 stops at a time. This allows us to see why all threads stopped and allows us
 to implement better multi-threaded debugging support.
 
-### Priority To Implement
+**Priority To Implement:** High
 
-High if you need to support multi-threaded or multi-core debugging.
+If you need to support multi-threaded or multi-core debugging.
 Many times one thread will hit a breakpoint and while the debugger
 is in the process of suspending the other threads, other threads
 will also hit a breakpoint. This packet allows LLDB to know why all
@@ -1254,9 +1193,9 @@ read packet: ....
 We also added support for allocating and deallocating memory. We use this to
 allocate memory so we can run JITed code.
 
-### Priority To Implement
+**Priority To Implement:** High
 
-High. Adding a thread suffix allows us to read and write registers
+Adding a thread suffix allows us to read and write registers
 more efficiently and stops us from having to select a thread with
 one packet and then read registers with a second packet. It also
 makes sure that no errors can occur where the debugger thinks it
@@ -1289,12 +1228,9 @@ You request a size and give the permissions. This packet does NOT need to be
 implemented if you don't want to support running JITed code. The return value
 is just the address of the newly allocated memory as raw big endian hex bytes.
 
-### Priority To Implement
-
-High if you want LLDB to be able to JIT code and run that code. JIT
-code also needs data which is also allocated and tracked.
-
-Low if you don't support running JIT'ed code.
+**Priority To Implement:** High if you want LLDB to be able to JIT code and run
+that code. JIT code also needs data which is also allocated and tracked. Low if
+you don't support running JIT'ed code.
 
 ## _m\<addr\>
 
@@ -1306,12 +1242,9 @@ got back from a previous call to the allocate memory packet. It returns `OK`
 if the memory was successfully deallocated, or `EXX`" for an error, or an
 empty response if not supported.
 
-### Priority To Implement
-
-High if you want LLDB to be able to JIT code and run that code. JIT
-code also needs data which is also allocated and tracked.
-
-Low if you don't support running JIT'ed code.
+**Priority To Implement:** High if you want LLDB to be able to JIT code and run
+that code. JIT code also needs data which is also allocated and tracked. Low if
+you don't support running JIT'ed code.
 
 ## qMemoryRegionInfo:\<addr\>
 
@@ -1371,9 +1304,9 @@ For instance, with a macOS process which has nothing mapped in the first
 The lack of `permissions:` indicates that none of read/write/execute are valid
 for this region.
 
-### Priority To Implement
+**Priority To Implement:** Medium
 
-Medium. This is nice to have, but it isn't necessary. It helps LLDB
+This is nice to have, but it isn't necessary. It helps LLDB
 do stack unwinding when we branch into memory that isn't executable.
 If we can detect that the code we are stopped in isn't executable,
 then we can recover registers for stack frames above the current
@@ -1446,8 +1379,7 @@ involves calling `thread_abort_safely(mach_port_t thread)` to
 ensure we get the correct registers for a thread in case it is
 currently having code run on its behalf in the kernel.
 
-### Response
-
+The response is either:
 * `<unsigned int>` - The save_id result is a non-zero unsigned integer value
                  that can be passed back to the GDB server using a
                  `QRestoreRegisterState` packet to restore the registers
@@ -1455,11 +1387,9 @@ currently having code run on its behalf in the kernel.
 * `EXX` - or an error code in the form of `EXX` where `XX` is a
           hex error code.
 
-### Priority To Implement
-
-Low, this is mostly a convenience packet to avoid having to send all
-registers with a `g` packet. It should only be implemented if support
-for the `QRestoreRegisterState` is added.
+**Priority To Implement:** Low, this is mostly a convenience packet to avoid
+having to send all registers with a `g` packet. It should only be implemented if
+support for the `QRestoreRegisterState` is added.
 
 ## QRestoreRegisterState:\<save_id\> / QRestoreRegisterState:\<save_id\>;thread:XXXX;
 
@@ -1473,16 +1403,13 @@ completion of the `QRestoreRegisterState` command.
 If thread suffixes are enabled the second form of this packet is
 used, otherwise the first form is used.
 
-### Response
-
+The response is either:
 * `OK` - if all registers were successfully restored
 * `EXX` - for any errors
 
-### Priority To Implement
-
-Low, this is mostly a convenience packet to avoid having to send all
-registers with a `g` packet. It should only be implemented if support
-for the `QSaveRegisterState` is added.
+**Priority To Implement:** Low, this is mostly a convenience packet to avoid
+having to send all registers with a `g` packet. It should only be implemented
+if support for the `QSaveRegisterState` is added.
 
 ## qFileLoadAddress:\<file_path\>
 
@@ -1490,39 +1417,31 @@ Get the load address of a memory mapped file.
 The load address is defined as the address of the first memory
 region what contains data mapped from the specified file.
 
-### Response
-
+The response is either:
 * `<unsigned-hex64>` - Load address of the file in big endian encoding
 * `E01` - the requested file isn't loaded
 * `EXX` - for any other errors
 
-### Priority To Implement
-
-Low, required if dynamic linker don't fill in the load address of
-some object file in the rendezvous data structure.
+**Priority To Implement:** Low, required if dynamic linker don't fill in the load
+address of some object file in the rendezvous data structure.
 
 ## qModuleInfo:\<module_path\>;\<arch triple\>
 
 Get information for a module by given module path and architecture.
 
-### Response
-
+The response is either:
 * `(uuid|md5):...;triple:...;file_offset:...;file_size...;`
 * `EXX` - for any errors
 
-### Priority To Implement
-
-Optional, required if dynamic loader cannot fetch module's information like
-UUID directly from inferior's memory.
+**Priority To Implement:** Optional, required if dynamic loader cannot fetch
+module's information like UUID directly from inferior's memory.
 
 ## jModulesInfo:[{"file":"...",triple:"..."}, ...]
 
 Get information for a list of modules by given module path and
 architecture.
 
-### Response
-
-A JSON array of dictionaries containing the following keys:
+The response is a JSON array of dictionaries containing the following keys:
 * `uuid`
 * `triple`
 * `file_path`
@@ -1535,11 +1454,9 @@ corresponding array entry from the response. The server may also
 include entries the client did not ask for, if it has reason to
 the modules will be interesting to the client.
 
-### Priority To Implement
-
-Optional. If not implemented, `qModuleInfo` packet will be used, which
-may be slower if the target contains a large number of modules and
-the communication link has a non-negligible latency.
+**Priority To Implement:** Optional. If not implemented, `qModuleInfo` packet
+will be used, which may be slower if the target contains a large number of modules
+and the communication link has a non-negligible latency.
 
 ## Stop reply packet extensions
 
@@ -1748,10 +1665,8 @@ program correctly. What if a real SIGTRAP was delivered to a thread
 while we were trying to single step? We wouldn't know the difference
 with a standard GDB remote server and we could do the wrong thing.
 
-### Priority To Implement
-
-High. Having the extra information in your stop reply packets makes
-your debug session more reliable and informative.
+**Priority To Implement:** High. Having the extra information in your stop reply
+packets makes your debug session more reliable and informative.
 
 ## qfProcessInfo / qsProcessInfo (Platform Extension)
 
@@ -1804,17 +1719,13 @@ send packet: $qsProcessInfo#00
 read packet: $E04#00
 ```
 
-### Priority To Implement
-
-Required.
+**Priority To Implement:** Required
 
 ## qPathComplete (Platform Extension)
 
 Get a list of matched disk files/directories by passing a boolean flag
 and a partial path.
 
-### Example
-
 ```
 receive: qPathComplete:0,6d61696e
 send:    M6d61696e2e637070
@@ -1833,8 +1744,6 @@ Paths denoting a directory should end with a directory separator (`/` or `\`.
 
 Kill a process running on the target system.
 
-### Example
-
 ```
 receive: qKillSpawnedProcess:1337
 send:    OK
@@ -1866,9 +1775,7 @@ process was separately launched.
 The `port` key/value pair in the response lets clients know what port number
 to attach to in case zero was specified as the "port" in the sent command.
 
-### Priority To Implement
-
-Required.
+**Priority To Implement:** Required
 
 ## qProcessInfoPID:PID (Platform Extension)
 
@@ -1894,40 +1801,32 @@ send packet: $qProcessInfoPID:60050#00
 read packet: $pid:60050;ppid:59948;uid:7746;gid:11;euid:7746;egid:11;name:6c6c6462;triple:x86_64-apple-macosx;#00
 ```
 
-### Priority To Implement
-
-Optional.
+**Priority To Implement:** Optional
 
 ## vAttachName
 
 Same as `vAttach`, except instead of a `pid` you send a process name.
 
-### Priority To Implement
-
-Low. Only needed for `process attach -n`.  If the packet isn't supported
-then `process attach -n` will fail gracefully.  So you need only to support
-it if attaching to a process by name makes sense for your environment.
+**Priority To Implement:** Low. Only needed for `process attach -n`. If the
+packet isn't supported then `process attach -n` will fail gracefully. So you need
+only to support it if attaching to a process by name makes sense for your environment.
 
 ## vAttachWait
 
 Same as `vAttachName`, except that the stub should wait for the next instance
 of a process by that name to be launched and attach to that.
 
-### Priority To Implement
-
-Low. Only needed to support `process attach -w -n` which will fail
-gracefully if the packet is not supported.
+**Priority To Implement:** Low. Only needed to support `process attach -w -n`
+which will fail gracefully if the packet is not supported.
 
 ## qAttachOrWaitSupported
 
 This is a binary "is it supported" query. Return OK if you support
 `vAttachOrWait`.
 
-### Priority To Implement
-
-Low. This is required if you support `vAttachOrWait`, otherwise no support
-is needed since the standard "I don't recognize this packet" response
-will do the right thing.
+**Priority To Implement:** Low. This is required if you support `vAttachOrWait`,
+otherwise no support is needed since the standard "I don't recognize this packet"
+response will do the right thing.
 
 ## vAttachOrWait
 
@@ -1935,9 +1834,9 @@ Same as `vAttachWait`, except that the stub will attach to a process
 by name if it exists, and if it does not, it will wait for a process
 of that name to appear and attach to it.
 
-### Priority To Implement
+**Priority To Implement:** Low
 
-Low. Only needed to implement `process attach -w -i false -n`.  If
+Only needed to implement `process attach -w -i false -n`.  If
 you don't implement it but do implement `-n` AND lldb can somehow get
 a process list from your device, it will fall back on scanning the
 process list, and sending `vAttach` or `vAttachWait` depending on
@@ -1998,11 +1897,8 @@ like:
 jThreadExtendedInfo:{"thread":612910}]
 ```
 
-### Priority To Implement
-
-Low.  This packet is only needed if the gdb remote stub wants to
-provide interesting additional information about a thread for the
-user.
+**Priority To Implement:** Low. This packet is only needed if the gdb remote stub
+wants to provide interesting additional information about a thread for the user.
 
 ## QEnableCompression
 
@@ -2146,7 +2042,7 @@ information is read.  Also the XML DTD would need to be extended
 quite a bit to provide all the information that the `DynamicLoaderMacOSX`
 would need to work correctly on this platform.
 
-### Priority To Implement
+**Priority To Implement:**
 
 On OS X 10.11, iOS 9, tvOS 9, watchOS 2 and older: Low.  If this packet is absent,
 lldb will read the Mach-O headers/load commands out of memory.
@@ -2212,9 +2108,9 @@ On macOS with debugserver, we expedite the frame pointer backchain for a thread
 the previous FP and PC), and follow the backchain. Most backtraces on macOS and
 iOS now don't require us to read any memory!
 
-### Priority To Implement
+**Priority To Implement:** Low
 
-Low. This is a performance optimization, which speeds up debugging by avoiding
+This is a performance optimization, which speeds up debugging by avoiding
 multiple round-trips for retrieving thread information. The information from this
 packet can be retrieved using a combination of `qThreadStopInfo` and `m` packets.
 
@@ -2230,9 +2126,9 @@ LLDB SENDS: jGetSharedCacheInfo:{}
 STUB REPLIES: ${"shared_cache_base_address":140735683125248,"shared_cache_uuid":"DDB8D70C-C9A2-3561-B2C8-BE48A4F33F96","no_shared_cache":false,"shared_cache_private_cache":false]}#00
 ```
 
-### Priority To Implement
+**Priority To Implement:** Low
 
-Low.  When both lldb and the inferior process are running on the same computer, and lldb
+When both lldb and the inferior process are running on the same computer, and lldb
 and the inferior process have the same shared cache, lldb may (as an optimization) read
 the shared cache out of its own memory instead of using gdb-remote read packets to read
 them from the inferior process.
@@ -2258,9 +2154,9 @@ Example packet:
 ]
 ```
 
-### Priority To Implement
+**Priority To Implement:** Low
 
-Low. The packet is required to support connecting to gdbserver started
+The packet is required to support connecting to gdbserver started
 by the platform instance automatically.
 
 ## QSetDetachOnError
@@ -2273,10 +2169,8 @@ The data in this packet is a single a character, which should be `0` if the
 inferior process should be killed, or `1` if the server should remove all
 breakpoints and detach from the inferior.
 
-### Priority To Implement
-
-Low. Only required if the target wants to keep the inferior process alive
-when the communication channel goes down.
+**Priority To Implement:** Low. Only required if the target wants to keep the
+inferior process alive when the communication channel goes down.
 
 ## jGetDyldProcessState
 
@@ -2289,10 +2183,8 @@ LLDB SENDS: jGetDyldProcessState
 STUB REPLIES: {"process_state_value":48,"process_state string":"dyld_process_state_libSystem_initialized"}
 ```
 
-### Priority To Implement
-
-Low. This packet is needed to prevent lldb's utility functions for
-scanning the Objective-C class list from running very early in
+**Priority To Implement:** Low. This packet is needed to prevent lldb's utility
+functions for scanning the Objective-C class list from running very early in
 process startup.
 
 ## vFile Packets
@@ -2305,8 +2197,6 @@ mismatches or extensions.
 
 Get the size of a file on the target system, filename in ASCII hex.
 
-#### Example
-
 ```
 receive: vFile:size:2f746d702f61
 send:    Fc008
@@ -2319,8 +2209,6 @@ response is `F` followed by the file size in base 16.
 
 Get the mode bits of a file on the target system, filename in ASCII hex.
 
-#### Example
-
 ```
 receive: vFile:mode:2f746d702f61
 send:    F1ed
@@ -2334,8 +2222,6 @@ correspond to `0755` in octal.
 
 Remove a file on the target system.
 
-#### Example
-
 ```
 receive: vFile:unlink:2f746d702f61
 send:    F0
@@ -2350,8 +2236,6 @@ value of errno if unlink failed.
 
 Create a symbolic link (symlink, soft-link) on the target system.
 
-#### Example
-
 ```
 receive: vFile:symlink:<SRC-FILE>,<DST-NAME>
 send:    F0,0
@@ -2365,8 +2249,6 @@ optionally followed by the value of errno if it failed, also base 16.
 
 Open a file on the remote system and return the file descriptor of it.
 
-#### Example
-
 ```
 receive: vFile:open:2f746d702f61,00000001,00000180
 send:    F8
@@ -2387,8 +2269,6 @@ response is `F` followed by the opened file descriptor in base 16.
 
 Close a previously opened file descriptor.
 
-#### Example
-
 ```
 receive: vFile:close:7
 send:    F0
@@ -2401,8 +2281,6 @@ errno is base 16.
 
 Read data from an opened file descriptor.
 
-#### Example
-
 ```
 receive: vFile:pread:7,1024,0
 send:    F4;a'b\00
@@ -2420,8 +2298,6 @@ semicolon, followed by the data in the binary-escaped-data encoding.
 
 Write data to a previously opened file descriptor.
 
-#### Example
-
 ```
 receive: vFile:pwrite:8,0,\cf\fa\ed\fe\0c\00\00
 send:    F1024
@@ -2438,8 +2314,6 @@ Response is `F`, followed by the number of bytes written (base 16).
 
 Generate an MD5 hash of the file at the given path.
 
-#### Example
-
 ```
 receive: vFile:MD5:2f746d702f61
 send (success): F,00000000000000001111111111111111
@@ -2459,8 +2333,6 @@ or failed to hash.
 
 Check whether the file at the given path exists.
 
-#### Example
-
 ```
 receive: vFile:exists:2f746d702f61
 send         (exists): F,1

From eb148aecb3603c2ba6ecbdaebd3b8a87f44349bc Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Tue, 30 Apr 2024 15:14:44 +0800
Subject: [PATCH 34/65] [LoongArch][Codegen] Add support for TLSDESC

The implementation only enables when the `-enable-tlsdesc` option is
passed and the TLS model is `dynamic`.

LoongArch's GCC has the same option(-mtls-dialet=) as RISC-V.

Reviewers: heiher, MaskRay, SixWeining

Reviewed By: SixWeining, MaskRay

Pull Request: https://github.com/llvm/llvm-project/pull/90159
---
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |   2 +-
 clang/test/CodeGen/LoongArch/tls-dialect.c    |  14 ++
 clang/test/Driver/tls-dialect.c               |   5 +
 .../LoongArch/LoongArchExpandPseudoInsts.cpp  | 108 ++++++++++++++
 .../LoongArch/LoongArchISelLowering.cpp       |  50 +++++--
 .../Target/LoongArch/LoongArchISelLowering.h  |   2 +
 .../Target/LoongArch/LoongArchInstrInfo.cpp   |   6 +
 .../Target/LoongArch/LoongArchInstrInfo.td    |   7 +
 .../Target/LoongArch/LoongArchMCInstLower.cpp |  18 +++
 .../MCTargetDesc/LoongArchBaseInfo.h          |   8 +-
 llvm/test/CodeGen/LoongArch/tls-models.ll     | 139 ++++++++++++++++++
 11 files changed, 343 insertions(+), 16 deletions(-)
 create mode 100644 clang/test/CodeGen/LoongArch/tls-dialect.c

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index fec11c7e716fdf..6796b43a155020 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -737,7 +737,7 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
   StringRef V = A->getValue();
   bool SupportedArgument = false, EnableTLSDESC = false;
   bool Unsupported = !Triple.isOSBinFormatELF();
-  if (Triple.isRISCV()) {
+  if (Triple.isLoongArch() || Triple.isRISCV()) {
     SupportedArgument = V == "desc" || V == "trad";
     EnableTLSDESC = V == "desc";
   } else if (Triple.isX86()) {
diff --git a/clang/test/CodeGen/LoongArch/tls-dialect.c b/clang/test/CodeGen/LoongArch/tls-dialect.c
new file mode 100644
index 00000000000000..03401ef8af03d4
--- /dev/null
+++ b/clang/test/CodeGen/LoongArch/tls-dialect.c
@@ -0,0 +1,14 @@
+// REQUIRES: loongarch-registered-target
+/// cc1 -enable-tlsdesc (due to -mtls-dialect=desc) enables TLSDESC.
+// RUN: %clang_cc1 -triple loongarch64 -S -mrelocation-model pic -pic-level 1 -enable-tlsdesc %s -o - | FileCheck %s --check-prefix=DESC
+// RUN: %clang_cc1 -triple loongarch64 -S -mrelocation-model pic -pic-level 1 %s -o - | FileCheck %s --check-prefix=NODESC
+
+__thread int x;
+
+// DESC:       %desc_pc_hi20
+// DESC-NOT:   %gd_pc_hi20
+// NODESC:     %gd_pc_hi20
+// NODESC-NOT: %desc_pc_hi20
+int use() {
+  return x;
+}
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index a808dd81531ce7..3471b55b0ebae9 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -1,3 +1,5 @@
+// RUN: %clang -### --target=loongarch64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=loongarch64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=riscv64-freebsd -mtls-dialect=desc %s 2>&1 | FileCheck --check-prefix=DESC %s
 // RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
@@ -9,6 +11,8 @@
 // RUN: %clang -### --target=riscv64-android %s 2>&1 | FileCheck --check-prefix=DESC %s
 
 /// LTO
+// RUN: %clang -### --target=loongarch64-linux -flto -mtls-dialect=desc %s 2>&1 | FileCheck --check-prefix=LTO-DESC %s
+// RUN: %clang -### --target=loongarch64-linux -flto %s 2>&1 | FileCheck --check-prefix=LTO-NODESC %s
 // RUN: %clang -### --target=riscv64-linux -flto -mtls-dialect=desc %s 2>&1 | FileCheck --check-prefix=LTO-DESC %s
 // RUN: %clang -### --target=riscv64-linux -flto %s 2>&1 | FileCheck --check-prefix=LTO-NODESC %s
 
@@ -18,6 +22,7 @@
 // RUN: not %clang --target=x86_64-apple-macos -mtls-dialect=desc -flto %s 2>&1 | FileCheck -check-prefix=UNSUPPORTED-TARGET %s
 
 /// Unsupported argument
+// RUN: not %clang -### --target=loongarch64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
 // RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
 
 // DESC:       "-cc1" {{.*}}"-enable-tlsdesc"
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index ad39658f698e7b..c136f5b3e515d7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -80,6 +80,9 @@ class LoongArchPreRAExpandPseudo : public MachineFunctionPass {
   bool expandLoadAddressTLSGD(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineBasicBlock::iterator &NextMBBI);
+  bool expandLoadAddressTLSDesc(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                MachineBasicBlock::iterator &NextMBBI);
 };
 
 char LoongArchPreRAExpandPseudo::ID = 0;
@@ -122,6 +125,8 @@ bool LoongArchPreRAExpandPseudo::expandMI(
     return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI);
   case LoongArch::PseudoLA_TLS_GD:
     return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_DESC_PC:
+    return expandLoadAddressTLSDesc(MBB, MBBI, NextMBBI);
   }
   return false;
 }
@@ -267,6 +272,52 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD(
                                  SecondOpcode, LoongArchII::MO_GOT_PC_LO);
 }
 
+bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  // Code Sequence:
+  // pcalau12i $a0, %desc_pc_hi20(sym)
+  // addi.w/d  $a0, $a0, %desc_pc_lo12(sym)
+  // ld.w/d    $ra, $a0, %desc_ld(sym)
+  // jirl      $ra, $ra, %desc_ld(sym)
+  // add.d     $dst, $a0, $tp
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  const auto &STI = MF->getSubtarget<LoongArchSubtarget>();
+  unsigned ADD = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
+  unsigned ADDI = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+  unsigned LD = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
+
+  Register DestReg = MI.getOperand(0).getReg();
+  Register ScratchReg =
+      MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+  MachineOperand &Symbol = MI.getOperand(1);
+
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_HI);
+
+  BuildMI(MBB, MBBI, DL, TII->get(ADDI), LoongArch::R4)
+      .addReg(ScratchReg)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+
+  BuildMI(MBB, MBBI, DL, TII->get(LD), LoongArch::R1)
+      .addReg(LoongArch::R4)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_LD);
+
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
+      .addReg(LoongArch::R1)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_CALL);
+
+  BuildMI(MBB, MBBI, DL, TII->get(ADD), DestReg)
+      .addReg(LoongArch::R4)
+      .addReg(LoongArch::R2);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 class LoongArchExpandPseudo : public MachineFunctionPass {
 public:
   const LoongArchInstrInfo *TII;
@@ -313,6 +364,9 @@ class LoongArchExpandPseudo : public MachineFunctionPass {
   bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
                                    MachineBasicBlock::iterator &NextMBBI);
+  bool expandLoadAddressTLSDescPcLarge(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       MachineBasicBlock::iterator &NextMBBI);
   bool expandFunctionCALL(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
                           MachineBasicBlock::iterator &NextMBBI,
@@ -361,6 +415,8 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI);
   case LoongArch::PseudoLA_TLS_GD_LARGE:
     return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI);
+  case LoongArch::PseudoLA_TLS_DESC_PC_LARGE:
+    return expandLoadAddressTLSDescPcLarge(MBB, MBBI, NextMBBI);
   case LoongArch::PseudoCALL:
   case LoongArch::PseudoCALL_MEDIUM:
   case LoongArch::PseudoCALL_LARGE:
@@ -560,6 +616,58 @@ bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge(
                                 LoongArchII::MO_GD_PC_HI);
 }
 
+bool LoongArchExpandPseudo::expandLoadAddressTLSDescPcLarge(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  // Code Sequence:
+  //
+  // pcalau12i  $a0, %desc_pc_hi20(sym)
+  // addi.d     $t8, $zero, %desc_pc_lo12(sym)
+  // lu32i.d    $t8, %desc64_pc_lo20(sym)
+  // lu52i.d    $t8, $t8, %desc64_pc_hi12(sym)
+  // add.d      $a0, $a0, $t8
+  // ld.d       $ra, $a0, %desc_ld(sym)
+  // jirl       $ra, $ra, %desc_call(sym)
+  // add.d      $dst, $a0, $tp
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  Register DestReg = MI.getOperand(0).getReg();
+  MachineOperand &Symbol = MI.getOperand(2);
+  Register ScratchReg = LoongArch::R20; // $t8
+
+  assert(MBB.getParent()->getSubtarget<LoongArchSubtarget>().is64Bit() &&
+         "Large code model requires LA64");
+
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), LoongArch::R4)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_HI);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg)
+      .addReg(LoongArch::R0)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg)
+      .addReg(ScratchReg)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_LO);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg)
+      .addReg(ScratchReg)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC64_PC_HI);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADD_D), LoongArch::R4)
+      .addReg(ScratchReg)
+      .addReg(LoongArch::R4);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LD_D), LoongArch::R1)
+      .addReg(LoongArch::R4)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_LD);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
+      .addReg(LoongArch::R1)
+      .addDisp(Symbol, 0, LoongArchII::MO_DESC_CALL);
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADD_D), DestReg)
+      .addReg(LoongArch::R4)
+      .addReg(LoongArch::R2);
+
+  MI.eraseFromParent();
+
+  return true;
+}
+
 bool LoongArchExpandPseudo::expandFunctionCALL(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 285d5c2a63b2da..e5c80644d4a83e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -903,6 +903,24 @@ SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
   return LowerCallTo(CLI).first;
 }
 
+SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
+                                                SelectionDAG &DAG, unsigned Opc,
+                                                bool Large) const {
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  const GlobalValue *GV = N->getGlobal();
+
+  // This is not actually used, but is necessary for successfully matching the
+  // PseudoLA_*_LARGE nodes.
+  SDValue Tmp = DAG.getConstant(0, DL, Ty);
+
+  // Use a PC-relative addressing mode to access the global dynamic GOT address.
+  // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
+  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+  return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
+               : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
+}
+
 SDValue
 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
@@ -916,42 +934,46 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   assert(N->getOffset() == 0 && "unexpected offset in global node");
 
-  SDValue Addr;
+  bool IsDesc = DAG.getTarget().useTLSDESC();
+
   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
   case TLSModel::GeneralDynamic:
     // In this model, application code calls the dynamic linker function
     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
     // runtime.
-    Addr = getDynamicTLSAddr(N, DAG,
-                             Large ? LoongArch::PseudoLA_TLS_GD_LARGE
-                                   : LoongArch::PseudoLA_TLS_GD,
-                             Large);
+    if (!IsDesc)
+      return getDynamicTLSAddr(N, DAG,
+                               Large ? LoongArch::PseudoLA_TLS_GD_LARGE
+                                     : LoongArch::PseudoLA_TLS_GD,
+                               Large);
     break;
   case TLSModel::LocalDynamic:
     // Same as GeneralDynamic, except for assembly modifiers and relocation
     // records.
-    Addr = getDynamicTLSAddr(N, DAG,
-                             Large ? LoongArch::PseudoLA_TLS_LD_LARGE
-                                   : LoongArch::PseudoLA_TLS_LD,
-                             Large);
+    if (!IsDesc)
+      return getDynamicTLSAddr(N, DAG,
+                               Large ? LoongArch::PseudoLA_TLS_LD_LARGE
+                                     : LoongArch::PseudoLA_TLS_LD,
+                               Large);
     break;
   case TLSModel::InitialExec:
     // This model uses the GOT to resolve TLS offsets.
-    Addr = getStaticTLSAddr(N, DAG,
+    return getStaticTLSAddr(N, DAG,
                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
                                   : LoongArch::PseudoLA_TLS_IE,
                             Large);
-    break;
   case TLSModel::LocalExec:
     // This model is used when static linking as the TLS offsets are resolved
     // during program linking.
     //
     // This node doesn't need an extra argument for the large code model.
-    Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
-    break;
+    return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
   }
 
-  return Addr;
+  return getTLSDescAddr(N, DAG,
+                        Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
+                              : LoongArch::PseudoLA_TLS_DESC_PC,
+                        Large);
 }
 
 template <unsigned N>
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 4bb6c049f8d758..31b4d651956342 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -263,6 +263,8 @@ class LoongArchTargetLowering : public TargetLowering {
                            unsigned Opc, bool Large = false) const;
   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
                             unsigned Opc, bool Large = false) const;
+  SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+                         unsigned Opc, bool Large = false) const;
   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 6576100d3b3218..babb6632471bbf 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -530,6 +530,12 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
       {MO_IE_PC_LO, "loongarch-ie-pc-lo"},
       {MO_IE_PC64_LO, "loongarch-ie-pc64-lo"},
       {MO_IE_PC64_HI, "loongarch-ie-pc64-hi"},
+      {MO_DESC_PC_HI, "loongarch-desc-pc-hi"},
+      {MO_DESC_PC_LO, "loongarch-desc-pc-lo"},
+      {MO_DESC64_PC_LO, "loongarch-desc64-pc-lo"},
+      {MO_DESC64_PC_HI, "loongarch-desc64-pc-hi"},
+      {MO_DESC_LD, "loongarch-desc-ld"},
+      {MO_DESC_CALL, "loongarch-desc-call"},
       {MO_LD_PC_HI, "loongarch-ld-pc-hi"},
       {MO_GD_PC_HI, "loongarch-gd-pc-hi"}};
   return ArrayRef(TargetFlags);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 958803b52d4ec8..a7f6eb9a79ebc0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1607,6 +1607,13 @@ def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst),
 } // Defs = [R20], Size = 20
 }
 
+// Used for expand PseudoLA_TLS_DESC_* instructions.
+let isCall = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0,
+    Defs = [R4], Uses = [R4] in
+def PseudoDESC_CALL : Pseudo<(outs GPR:$rd), (ins GPR:$rj, simm16_lsl2:$imm16)>,
+                      PseudoInstExpansion<(JIRL GPR:$rd, GPR:$rj,
+                                           simm16_lsl2:$imm16)>;
+
 // TLSDESC
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
     isAsmParserOnly = 1, Defs = [R1] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
index 98ad49f25e3f2f..d1d428241ebcc2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -98,6 +98,24 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
   case LoongArchII::MO_CALL36:
     Kind = LoongArchMCExpr::VK_LoongArch_CALL36;
     break;
+  case LoongArchII::MO_DESC_PC_HI:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20;
+    break;
+  case LoongArchII::MO_DESC_PC_LO:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_LO12;
+    break;
+  case LoongArchII::MO_DESC64_PC_LO:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_LO20;
+    break;
+  case LoongArchII::MO_DESC64_PC_HI:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC64_PC_HI12;
+    break;
+  case LoongArchII::MO_DESC_LD:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC_LD;
+    break;
+  case LoongArchII::MO_DESC_CALL:
+    Kind = LoongArchMCExpr::VK_LoongArch_TLS_DESC_CALL;
+    break;
     // TODO: Handle more target-flags.
   }
 
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index 0692cb92b69440..3c3fed7d43ed98 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -47,7 +47,13 @@ enum {
   MO_IE_PC64_HI,
   MO_LD_PC_HI,
   MO_GD_PC_HI,
-  MO_CALL36
+  MO_CALL36,
+  MO_DESC_PC_HI,
+  MO_DESC_PC_LO,
+  MO_DESC64_PC_HI,
+  MO_DESC64_PC_LO,
+  MO_DESC_LD,
+  MO_DESC_CALL,
   // TODO: Add more flags.
 };
 } // end namespace LoongArchII
diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll
index 3994df1da7163f..6b250ec021624a 100644
--- a/llvm/test/CodeGen/LoongArch/tls-models.ll
+++ b/llvm/test/CodeGen/LoongArch/tls-models.ll
@@ -5,6 +5,12 @@
 ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC
 ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC
 ; RUN: llc --mtriple=loongarch64 --code-model=large < %s | FileCheck %s --check-prefix=LA64LARGENOPIC
+; RUN: llc --mtriple=loongarch32 --relocation-model=pic --enable-tlsdesc < %s \
+; RUN:     | FileCheck %s --check-prefix=LA32DESC
+; RUN: llc --mtriple=loongarch64 --relocation-model=pic --enable-tlsdesc < %s \
+; RUN:     | FileCheck %s --check-prefix=LA64DESC
+; RUN: llc --mtriple=loongarch64 --relocation-model=pic --enable-tlsdesc \
+; RUN:     --code-model=large < %s | FileCheck %s --check-prefix=DESC64
 
 ;; Check that TLS symbols are lowered correctly based on the specified
 ;; model. Make sure they're external to avoid them all being optimised to Local
@@ -82,6 +88,49 @@ define ptr @f1() nounwind {
 ; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
+;
+; LA32DESC-LABEL: f1:
+; LA32DESC:       # %bb.0: # %entry
+; LA32DESC-NEXT:    addi.w $sp, $sp, -16
+; LA32DESC-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32DESC-NEXT:    pcalau12i $a0, %desc_pc_hi20(unspecified)
+; LA32DESC-NEXT:    addi.w $a0, $a0, %desc_pc_lo12(unspecified)
+; LA32DESC-NEXT:    ld.w $ra, $a0, %desc_ld(unspecified)
+; LA32DESC-NEXT:    jirl $ra, $ra, %desc_call(unspecified)
+; LA32DESC-NEXT:    add.w $a0, $a0, $tp
+; LA32DESC-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32DESC-NEXT:    addi.w $sp, $sp, 16
+; LA32DESC-NEXT:    ret
+;
+; LA64DESC-LABEL: f1:
+; LA64DESC:       # %bb.0: # %entry
+; LA64DESC-NEXT:    addi.d $sp, $sp, -16
+; LA64DESC-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64DESC-NEXT:    pcalau12i $a0, %desc_pc_hi20(unspecified)
+; LA64DESC-NEXT:    addi.d $a0, $a0, %desc_pc_lo12(unspecified)
+; LA64DESC-NEXT:    ld.d $ra, $a0, %desc_ld(unspecified)
+; LA64DESC-NEXT:    jirl $ra, $ra, %desc_call(unspecified)
+; LA64DESC-NEXT:    add.d $a0, $a0, $tp
+; LA64DESC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64DESC-NEXT:    addi.d $sp, $sp, 16
+; LA64DESC-NEXT:    ret
+;
+; DESC64-LABEL: f1:
+; DESC64:       # %bb.0: # %entry
+; DESC64-NEXT:    addi.d $sp, $sp, -16
+; DESC64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; DESC64-NEXT:    pcalau12i $a0, %desc_pc_hi20(unspecified)
+; DESC64-NEXT:    addi.d $t8, $zero, %desc_pc_lo12(unspecified)
+; DESC64-NEXT:    lu32i.d $t8, %desc64_pc_lo20(unspecified)
+; DESC64-NEXT:    lu52i.d $t8, $t8, %desc64_pc_hi12(unspecified)
+; DESC64-NEXT:    add.d $a0, $t8, $a0
+; DESC64-NEXT:    ld.d $ra, $a0, %desc_ld(unspecified)
+; DESC64-NEXT:    jirl $ra, $ra, %desc_call(unspecified)
+; DESC64-NEXT:    add.d $a1, $a0, $tp
+; DESC64-NEXT:    move $a0, $a1
+; DESC64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; DESC64-NEXT:    addi.d $sp, $sp, 16
+; DESC64-NEXT:    ret
 entry:
   ret ptr @unspecified
 }
@@ -153,6 +202,49 @@ define ptr @f2() nounwind {
 ; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
+;
+; LA32DESC-LABEL: f2:
+; LA32DESC:       # %bb.0: # %entry
+; LA32DESC-NEXT:    addi.w $sp, $sp, -16
+; LA32DESC-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32DESC-NEXT:    pcalau12i $a0, %desc_pc_hi20(ld)
+; LA32DESC-NEXT:    addi.w $a0, $a0, %desc_pc_lo12(ld)
+; LA32DESC-NEXT:    ld.w $ra, $a0, %desc_ld(ld)
+; LA32DESC-NEXT:    jirl $ra, $ra, %desc_call(ld)
+; LA32DESC-NEXT:    add.w $a0, $a0, $tp
+; LA32DESC-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32DESC-NEXT:    addi.w $sp, $sp, 16
+; LA32DESC-NEXT:    ret
+;
+; LA64DESC-LABEL: f2:
+; LA64DESC:       # %bb.0: # %entry
+; LA64DESC-NEXT:    addi.d $sp, $sp, -16
+; LA64DESC-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64DESC-NEXT:    pcalau12i $a0, %desc_pc_hi20(ld)
+; LA64DESC-NEXT:    addi.d $a0, $a0, %desc_pc_lo12(ld)
+; LA64DESC-NEXT:    ld.d $ra, $a0, %desc_ld(ld)
+; LA64DESC-NEXT:    jirl $ra, $ra, %desc_call(ld)
+; LA64DESC-NEXT:    add.d $a0, $a0, $tp
+; LA64DESC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64DESC-NEXT:    addi.d $sp, $sp, 16
+; LA64DESC-NEXT:    ret
+;
+; DESC64-LABEL: f2:
+; DESC64:       # %bb.0: # %entry
+; DESC64-NEXT:    addi.d $sp, $sp, -16
+; DESC64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; DESC64-NEXT:    pcalau12i $a0, %desc_pc_hi20(ld)
+; DESC64-NEXT:    addi.d $t8, $zero, %desc_pc_lo12(ld)
+; DESC64-NEXT:    lu32i.d $t8, %desc64_pc_lo20(ld)
+; DESC64-NEXT:    lu52i.d $t8, $t8, %desc64_pc_hi12(ld)
+; DESC64-NEXT:    add.d $a0, $t8, $a0
+; DESC64-NEXT:    ld.d $ra, $a0, %desc_ld(ld)
+; DESC64-NEXT:    jirl $ra, $ra, %desc_call(ld)
+; DESC64-NEXT:    add.d $a1, $a0, $tp
+; DESC64-NEXT:    move $a0, $a1
+; DESC64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; DESC64-NEXT:    addi.d $sp, $sp, 16
+; DESC64-NEXT:    ret
 entry:
   ret ptr @ld
 }
@@ -207,6 +299,30 @@ define ptr @f3() nounwind {
 ; LA64LARGENOPIC-NEXT:    ldx.d $a0, $t8, $a0
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
+;
+; LA32DESC-LABEL: f3:
+; LA32DESC:       # %bb.0: # %entry
+; LA32DESC-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
+; LA32DESC-NEXT:    ld.w $a0, $a0, %ie_pc_lo12(ie)
+; LA32DESC-NEXT:    add.w $a0, $a0, $tp
+; LA32DESC-NEXT:    ret
+;
+; LA64DESC-LABEL: f3:
+; LA64DESC:       # %bb.0: # %entry
+; LA64DESC-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
+; LA64DESC-NEXT:    ld.d $a0, $a0, %ie_pc_lo12(ie)
+; LA64DESC-NEXT:    add.d $a0, $a0, $tp
+; LA64DESC-NEXT:    ret
+;
+; DESC64-LABEL: f3:
+; DESC64:       # %bb.0: # %entry
+; DESC64-NEXT:    pcalau12i $a0, %ie_pc_hi20(ie)
+; DESC64-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
+; DESC64-NEXT:    lu32i.d $t8, %ie64_pc_lo20(ie)
+; DESC64-NEXT:    lu52i.d $t8, $t8, %ie64_pc_hi12(ie)
+; DESC64-NEXT:    ldx.d $a0, $t8, $a0
+; DESC64-NEXT:    add.d $a0, $a0, $tp
+; DESC64-NEXT:    ret
 entry:
   ret ptr @ie
 }
@@ -259,6 +375,29 @@ define ptr @f4() nounwind {
 ; LA64LARGENOPIC-NEXT:    lu52i.d $a0, $a0, %le64_hi12(le)
 ; LA64LARGENOPIC-NEXT:    add.d $a0, $a0, $tp
 ; LA64LARGENOPIC-NEXT:    ret
+;
+; LA32DESC-LABEL: f4:
+; LA32DESC:       # %bb.0: # %entry
+; LA32DESC-NEXT:    lu12i.w $a0, %le_hi20(le)
+; LA32DESC-NEXT:    ori $a0, $a0, %le_lo12(le)
+; LA32DESC-NEXT:    add.w $a0, $a0, $tp
+; LA32DESC-NEXT:    ret
+;
+; LA64DESC-LABEL: f4:
+; LA64DESC:       # %bb.0: # %entry
+; LA64DESC-NEXT:    lu12i.w $a0, %le_hi20(le)
+; LA64DESC-NEXT:    ori $a0, $a0, %le_lo12(le)
+; LA64DESC-NEXT:    add.d $a0, $a0, $tp
+; LA64DESC-NEXT:    ret
+;
+; DESC64-LABEL: f4:
+; DESC64:       # %bb.0: # %entry
+; DESC64-NEXT:    lu12i.w $a0, %le_hi20(le)
+; DESC64-NEXT:    ori $a0, $a0, %le_lo12(le)
+; DESC64-NEXT:    lu32i.d $a0, %le64_lo20(le)
+; DESC64-NEXT:    lu52i.d $a0, $a0, %le64_hi12(le)
+; DESC64-NEXT:    add.d $a0, $a0, $tp
+; DESC64-NEXT:    ret
 entry:
   ret ptr @le
 }

From 91a8cb781dbc981356207e0c3608d92ed6d26042 Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer@amd.com>
Date: Tue, 30 Apr 2024 08:25:54 +0100
Subject: [PATCH 35/65] Reapply "[flang] Improve debug info for functions."
 with regression fixed. (#90484)

The original PR #90083 had to be reverted in PR #90444 as it caused one
of the gfortran tests to fail. The issue was using `isIntOrIndex` for
checking for integer type. It allowed index type which later caused
assertion when calling `getIntOrFloatBitWidth`. I have now replaced it
with `isInteger` which should fix this regression.
---
 .../lib/Optimizer/Transforms/AddDebugInfo.cpp | 36 ++++++++---
 flang/lib/Optimizer/Transforms/CMakeLists.txt |  1 +
 .../Transforms/DebugTypeGenerator.cpp         | 63 +++++++++++++++++++
 .../Optimizer/Transforms/DebugTypeGenerator.h | 40 ++++++++++++
 flang/test/Transforms/debug-fn-info.f90       | 43 +++++++++++++
 .../Transforms/debug-line-table-inc-file.fir  |  2 +-
 6 files changed, 174 insertions(+), 11 deletions(-)
 create mode 100644 flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
 create mode 100644 flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
 create mode 100644 flang/test/Transforms/debug-fn-info.f90

diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
index 18d98a11ef3c40..5108469cb6c84a 100644
--- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
+++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
@@ -11,6 +11,7 @@
 /// This pass populates some debug information for the module and functions.
 //===----------------------------------------------------------------------===//
 
+#include "DebugTypeGenerator.h"
 #include "flang/Common/Version.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
@@ -106,14 +107,27 @@ void AddDebugInfoPass::runOnOperation() {
       filePath = llvm::sys::path::parent_path(funcLoc.getFilename().getValue());
     }
 
-    mlir::StringAttr funcName =
+    mlir::StringAttr fullName =
         mlir::StringAttr::get(context, funcOp.getName());
-    mlir::LLVM::DIBasicTypeAttr bT = mlir::LLVM::DIBasicTypeAttr::get(
-        context, llvm::dwarf::DW_TAG_base_type, "void", /*sizeInBits=*/0,
-        /*encoding=*/1);
-    // FIXME: Provide proper type for subroutine
+    auto result = fir::NameUniquer::deconstruct(funcOp.getName());
+    mlir::StringAttr funcName =
+        mlir::StringAttr::get(context, result.second.name);
+
+    llvm::SmallVector<mlir::LLVM::DITypeAttr> types;
+    fir::DebugTypeGenerator typeGen(module);
+    for (auto resTy : funcOp.getResultTypes()) {
+      auto tyAttr =
+          typeGen.convertType(resTy, fileAttr, cuAttr, funcOp.getLoc());
+      types.push_back(tyAttr);
+    }
+    for (auto inTy : funcOp.getArgumentTypes()) {
+      auto tyAttr = typeGen.convertType(fir::unwrapRefType(inTy), fileAttr,
+                                        cuAttr, funcOp.getLoc());
+      types.push_back(tyAttr);
+    }
+
     mlir::LLVM::DISubroutineTypeAttr subTypeAttr =
-        mlir::LLVM::DISubroutineTypeAttr::get(context, CC, {bT, bT});
+        mlir::LLVM::DISubroutineTypeAttr::get(context, CC, types);
     mlir::LLVM::DIFileAttr funcFileAttr =
         mlir::LLVM::DIFileAttr::get(context, fileName, filePath);
 
@@ -130,11 +144,13 @@ void AddDebugInfoPass::runOnOperation() {
       subprogramFlags =
           subprogramFlags | mlir::LLVM::DISubprogramFlags::Definition;
     }
-    // FIXME: Provide proper line and scopeline.
+    unsigned line = 1;
+    if (auto funcLoc = l.dyn_cast<mlir::FileLineColLoc>())
+      line = funcLoc.getLine();
+
     auto spAttr = mlir::LLVM::DISubprogramAttr::get(
-        context, id, compilationUnit, fileAttr, funcName, funcName,
-        funcFileAttr, /*line=*/1, /*scopeline=*/1, subprogramFlags,
-        subTypeAttr);
+        context, id, compilationUnit, fileAttr, funcName, fullName,
+        funcFileAttr, line, line, subprogramFlags, subTypeAttr);
     funcOp->setLoc(builder.getFusedLoc({funcOp->getLoc()}, spAttr));
   });
 }
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index fc08d67540ceb0..5a542f237f8f98 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -22,6 +22,7 @@ add_flang_library(FIRTransforms
   OMPMarkDeclareTarget.cpp
   VScaleAttr.cpp
   FunctionAttr.cpp
+  DebugTypeGenerator.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
new file mode 100644
index 00000000000000..0f4ebf9507b124
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -0,0 +1,63 @@
+//===-- DebugTypeGenerator.cpp -- type conversion ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "flang-debug-type-generator"
+
+#include "DebugTypeGenerator.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/Debug.h"
+
+namespace fir {
+
+DebugTypeGenerator::DebugTypeGenerator(mlir::ModuleOp m)
+    : module(m), kindMapping(getKindMapping(m)) {
+  LLVM_DEBUG(llvm::dbgs() << "DITypeAttr generator\n");
+}
+
+static mlir::LLVM::DITypeAttr genPlaceholderType(mlir::MLIRContext *context) {
+  return mlir::LLVM::DIBasicTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_base_type, "void", 32, 1);
+}
+
+static mlir::LLVM::DITypeAttr genBasicType(mlir::MLIRContext *context,
+                                           mlir::StringAttr name,
+                                           unsigned bitSize,
+                                           unsigned decoding) {
+  return mlir::LLVM::DIBasicTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_base_type, name, bitSize, decoding);
+}
+
+mlir::LLVM::DITypeAttr
+DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr,
+                                mlir::LLVM::DIScopeAttr scope,
+                                mlir::Location loc) {
+  mlir::MLIRContext *context = module.getContext();
+  if (Ty.isInteger()) {
+    return genBasicType(context, mlir::StringAttr::get(context, "integer"),
+                        Ty.getIntOrFloatBitWidth(), llvm::dwarf::DW_ATE_signed);
+  } else if (Ty.isa<mlir::FloatType>() || Ty.isa<fir::RealType>()) {
+    return genBasicType(context, mlir::StringAttr::get(context, "real"),
+                        Ty.getIntOrFloatBitWidth(), llvm::dwarf::DW_ATE_float);
+  } else if (auto logTy = Ty.dyn_cast_or_null<fir::LogicalType>()) {
+    return genBasicType(context,
+                        mlir::StringAttr::get(context, logTy.getMnemonic()),
+                        kindMapping.getLogicalBitsize(logTy.getFKind()),
+                        llvm::dwarf::DW_ATE_boolean);
+  } else {
+    // FIXME: These types are currently unhandled. We are generating a
+    // placeholder type to allow us to test supported bits.
+    return genPlaceholderType(context);
+  }
+}
+
+} // namespace fir
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
new file mode 100644
index 00000000000000..5a2bb201db47a3
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
@@ -0,0 +1,40 @@
+//===-- DebugTypeGenerator.h -- type conversion ------------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_DEBUGTYPEGENERATOR_H
+#define FORTRAN_OPTIMIZER_TRANSFORMS_DEBUGTYPEGENERATOR_H
+
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "llvm/Support/Debug.h"
+
+namespace fir {
+
+/// This converts FIR/mlir type to DITypeAttr.
+class DebugTypeGenerator {
+public:
+  DebugTypeGenerator(mlir::ModuleOp module);
+
+  mlir::LLVM::DITypeAttr convertType(mlir::Type Ty,
+                                     mlir::LLVM::DIFileAttr fileAttr,
+                                     mlir::LLVM::DIScopeAttr scope,
+                                     mlir::Location loc);
+
+private:
+  mlir::ModuleOp module;
+  KindMapping kindMapping;
+};
+
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_TRANSFORMS_DEBUGTYPEGENERATOR_H
diff --git a/flang/test/Transforms/debug-fn-info.f90 b/flang/test/Transforms/debug-fn-info.f90
new file mode 100644
index 00000000000000..c1a817312c959c
--- /dev/null
+++ b/flang/test/Transforms/debug-fn-info.f90
@@ -0,0 +1,43 @@
+! RUN: %flang_fc1 -emit-fir -debug-info-kind=standalone -mmlir --mlir-print-debuginfo %s -o - | fir-opt --add-debug-info --mlir-print-debuginfo  | FileCheck %s
+
+
+! CHECK-DAG: #[[INT8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 64, encoding = DW_ATE_signed>
+! CHECK-DAG: #[[INT4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
+! CHECK-DAG: #[[REAL8:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 64, encoding = DW_ATE_float>
+! CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 8, encoding = DW_ATE_boolean>
+! CHECK-DAG: #[[REAL4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
+! CHECK-DAG: #[[LOG4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 32, encoding = DW_ATE_boolean>
+! CHECK: #[[TY1:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #[[INT8]], #[[INT4]], #[[REAL8]], #[[LOG1]]>
+! CHECK: #[[TY2:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #[[INT4]], #[[INT8]], #[[REAL4]], #[[LOG4]]>
+
+program mn
+  integer(kind=4) :: i4
+  integer(kind=8) :: i8
+  real(kind=4) :: r4
+  real(kind=8) :: r8
+  logical(kind=1) :: l1
+  logical(kind=4) :: l4
+  i8 = fn1(i4, r8, l1)
+  i4 = fn2(i8, r4, l4)
+contains
+  ! CHECK: #di_subprogram1 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn1", linkageName = "_QFPfn1", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY1]]>
+  function fn1(a, b, c) result (res)
+    implicit none
+    integer(kind=4), intent(in) :: a
+    real(kind=8), intent(in) :: b
+    logical(kind=1), intent(in) :: c
+    integer(kind=8) :: res
+    res = a + b
+  end function
+
+! CHECK: #di_subprogram2 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn2", linkageName = "_QFPfn2", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY2]]>
+  function fn2(a, b, c) result (res)
+    implicit none
+    integer(kind=8), intent(in) :: a
+    real(kind=4), intent(in) :: b
+    logical(kind=4), intent(in) :: c
+    integer(kind=4) :: res
+    res = a + b
+  end function
+end program
+
diff --git a/flang/test/Transforms/debug-line-table-inc-file.fir b/flang/test/Transforms/debug-line-table-inc-file.fir
index dc75482d4f8a7f..d7f60a1a86dbf0 100644
--- a/flang/test/Transforms/debug-line-table-inc-file.fir
+++ b/flang/test/Transforms/debug-line-table-inc-file.fir
@@ -31,7 +31,7 @@ module attributes {} {
 // CHECK: #[[LOC_INC_FILE:.*]] = loc("{{.*}}inc.f90":1:1)
 // CHECK: #[[LOC_FILE:.*]] = loc("{{.*}}simple.f90":3:1)
 // CHECK: #[[DI_CU:.*]] = #llvm.di_compile_unit<id = distinct[{{.*}}]<>, sourceLanguage = DW_LANG_Fortran95, file = #[[DI_FILE]], producer = "flang{{.*}}", isOptimized = false, emissionKind = LineTablesOnly>
-// CHECK: #[[DI_SP_INC:.*]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #[[DI_CU]], scope = #[[DI_FILE]], name = "_QPsinc", linkageName = "_QPsinc", file = #[[DI_INC_FILE]], {{.*}}>
+// CHECK: #[[DI_SP_INC:.*]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #[[DI_CU]], scope = #[[DI_FILE]], name = "sinc", linkageName = "_QPsinc", file = #[[DI_INC_FILE]], {{.*}}>
 // CHECK: #[[DI_SP:.*]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #[[DI_CU]], scope = #[[DI_FILE]], name = "_QQmain", linkageName = "_QQmain", file = #[[DI_FILE]], {{.*}}>
 // CHECK: #[[FUSED_LOC_INC_FILE]] = loc(fused<#[[DI_SP_INC]]>[#[[LOC_INC_FILE]]])
 // CHECK: #[[FUSED_LOC_FILE]] = loc(fused<#[[DI_SP]]>[#[[LOC_FILE]]])

From 09e7d86b99c6e6d4f4a3296647d1b7d803c5bac5 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Date: Tue, 30 Apr 2024 08:48:24 +0100
Subject: [PATCH 36/65] [RemoveDIs] Fix findDbgValues to return dbg_assign
 records too (#90471)

In the debug intrinsic class heirachy, a dbg.assign is a (inherits from)
dbg.value, so `findDbgValues` returns dbg.values and dbg.assigns (by
design). That hierarchy doesn't exist for DbgRecords - fix findDbgValues
to return dbg_assign records as well as dbg_values and add unittest.
---
 llvm/lib/IR/DebugInfo.cpp                     | 13 ++--
 llvm/unittests/Transforms/Utils/LocalTest.cpp | 61 +++++++++++++++++++
 2 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 4206162d176823..7976904b1fe9d6 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -80,8 +80,7 @@ TinyPtrVector<DbgVariableRecord *> llvm::findDVRDeclares(Value *V) {
   return Declares;
 }
 
-template <typename IntrinsicT, DbgVariableRecord::LocationType Type =
-                                   DbgVariableRecord::LocationType::Any>
+template <typename IntrinsicT, bool DbgAssignAndValuesOnly>
 static void
 findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V,
                   SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords) {
@@ -114,8 +113,7 @@ findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V,
     // Get DbgVariableRecords that use this as a single value.
     if (LocalAsMetadata *L = dyn_cast<LocalAsMetadata>(MD)) {
       for (DbgVariableRecord *DVR : L->getAllDbgVariableRecordUsers()) {
-        if (Type == DbgVariableRecord::LocationType::Any ||
-            DVR->getType() == Type)
+        if (!DbgAssignAndValuesOnly || DVR->isDbgValue() || DVR->isDbgAssign())
           if (EncounteredDbgVariableRecords.insert(DVR).second)
             DbgVariableRecords->push_back(DVR);
       }
@@ -130,8 +128,7 @@ findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V,
         continue;
       DIArgList *DI = cast<DIArgList>(AL);
       for (DbgVariableRecord *DVR : DI->getAllDbgVariableRecordUsers())
-        if (Type == DbgVariableRecord::LocationType::Any ||
-            DVR->getType() == Type)
+        if (!DbgAssignAndValuesOnly || DVR->isDbgValue() || DVR->isDbgAssign())
           if (EncounteredDbgVariableRecords.insert(DVR).second)
             DbgVariableRecords->push_back(DVR);
     }
@@ -141,14 +138,14 @@ findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V,
 void llvm::findDbgValues(
     SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V,
     SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords) {
-  findDbgIntrinsics<DbgValueInst, DbgVariableRecord::LocationType::Value>(
+  findDbgIntrinsics<DbgValueInst, /*DbgAssignAndValuesOnly=*/true>(
       DbgValues, V, DbgVariableRecords);
 }
 
 void llvm::findDbgUsers(
     SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers, Value *V,
     SmallVectorImpl<DbgVariableRecord *> *DbgVariableRecords) {
-  findDbgIntrinsics<DbgVariableIntrinsic, DbgVariableRecord::LocationType::Any>(
+  findDbgIntrinsics<DbgVariableIntrinsic, /*DbgAssignAndValuesOnly=*/false>(
       DbgUsers, V, DbgVariableRecords);
 }
 
diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp
index d7d0ea2c6a6e79..a0119ed5159d5a 100644
--- a/llvm/unittests/Transforms/Utils/LocalTest.cpp
+++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp
@@ -732,6 +732,67 @@ TEST(Local, FindDbgUsers) {
   EXPECT_EQ(Vals.size(), 1u);
 }
 
+TEST(Local, FindDbgRecords) {
+  // DbgRecord copy of the FindDbgUsers test above.
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M = parseIR(Ctx,
+                                      R"(
+  define dso_local void @fun(ptr %a) #0 !dbg !11 {
+  entry:
+    call void @llvm.dbg.assign(metadata ptr %a, metadata !16, metadata !DIExpression(), metadata !15, metadata ptr %a, metadata !DIExpression()), !dbg !19
+    ret void
+  }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!2, !3, !9}
+  !llvm.ident = !{!10}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 17.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+  !1 = !DIFile(filename: "test.cpp", directory: "/")
+  !2 = !{i32 7, !"Dwarf Version", i32 5}
+  !3 = !{i32 2, !"Debug Info Version", i32 3}
+  !4 = !{i32 1, !"wchar_size", i32 4}
+  !9 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+  !10 = !{!"clang version 17.0.0"}
+  !11 = distinct !DISubprogram(name: "fun", linkageName: "fun", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14)
+  !12 = !DISubroutineType(types: !13)
+  !13 = !{null}
+  !14 = !{}
+  !15 = distinct !DIAssignID()
+  !16 = !DILocalVariable(name: "x", scope: !11, file: !1, line: 2, type: !17)
+  !17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64)
+  !18 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !19 = !DILocation(line: 0, scope: !11)
+  )");
+
+  bool BrokenDebugInfo = true;
+  verifyModule(*M, &errs(), &BrokenDebugInfo);
+  ASSERT_FALSE(BrokenDebugInfo);
+  bool NewDbgInfoFormat = UseNewDbgInfoFormat;
+  UseNewDbgInfoFormat = true;
+  M->convertToNewDbgValues();
+
+  Function &Fun = *cast<Function>(M->getNamedValue("fun"));
+  Value *Arg = Fun.getArg(0);
+
+  SmallVector<DbgVariableIntrinsic *> Users;
+  SmallVector<DbgVariableRecord *> Records;
+  // Arg (%a) is used twice by a single dbg_assign. Check findDbgUsers returns
+  // only 1 pointer to it rather than 2.
+  findDbgUsers(Users, Arg, &Records);
+  EXPECT_EQ(Users.size(), 0u);
+  EXPECT_EQ(Records.size(), 1u);
+
+  SmallVector<DbgValueInst *> Vals;
+  Records.clear();
+  // Arg (%a) is used twice by a single dbg_assign. Check findDbgValues returns
+  // only 1 pointer to it rather than 2.
+  findDbgValues(Vals, Arg, &Records);
+  EXPECT_EQ(Vals.size(), 0u);
+  EXPECT_EQ(Records.size(), 1u);
+  UseNewDbgInfoFormat = NewDbgInfoFormat;
+}
+
 TEST(Local, ReplaceAllDbgUsesWith) {
   using namespace llvm::dwarf;
 

From 853344d3ae8bb655b2d15175880bd3f65ab66434 Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls@arm.com>
Date: Tue, 30 Apr 2024 09:49:57 +0200
Subject: [PATCH 37/65] [docs] Document which online sync-ups are no longer
 happening (#89361)

Some of the online sync-ups on our Getting Involved page seem to no
longer be happening. Document them as no longer happening, so that
people don't get confused when dialing in to one of these.
---
 llvm/docs/GettingInvolved.rst | 60 ++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst
index 93be3bd1d8545e..a45d73a9a3d42b 100644
--- a/llvm/docs/GettingInvolved.rst
+++ b/llvm/docs/GettingInvolved.rst
@@ -158,11 +158,6 @@ what to add to your calendar invite.
      - `ics <https://calendar.google.com/calendar/ical/lowrisc.org_0n5pkesfjcnp0bh5hps1p0bd80%40group.calendar.google.com/public/basic.ics>`__
        `gcal <https://calendar.google.com/calendar/b/1?cid=bG93cmlzYy5vcmdfMG41cGtlc2ZqY25wMGJoNWhwczFwMGJkODBAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ>`__
      - `Minutes/docs <https://docs.google.com/document/d/1G3ocHm2zE6AYTS2N3_3w2UxFnSEyKkcF57siLWe-NVs>`__
-   * - Scalable Vectors and Arm SVE
-     - Monthly, every 3rd Tuesday
-     - `ics <https://calendar.google.com/calendar/ical/bjms39pe6k6bo5egtsp7don414%40group.calendar.google.com/public/basic.ics>`__
-       `gcal <https://calendar.google.com/calendar/u/0/embed?src=bjms39pe6k6bo5egtsp7don414@group.calendar.google.com>`__
-     - `Minutes/docs <https://docs.google.com/document/d/1UPH2Hzou5RgGT8XfO39OmVXKEibWPfdYLELSaHr3xzo/edit>`__
    * - ML Guided Compiler Optimizations
      - Monthly
      -
@@ -176,10 +171,6 @@ what to add to your calendar invite.
      - Weekly, on Wednesday
      -
      - `Minutes/docs <https://docs.google.com/document/d/1fOSRdyZR2w75D87yU2Ma9h2-_lEPL4NxvhJGJd-s5pk/edit#heading=h.mulvhjtr8dk9>`__
-   * - `MLIR <https://mlir.llvm.org>`__ design meetings
-     - Weekly, on Thursdays
-     -
-     - `Minutes/docs <https://docs.google.com/document/d/1y_9f1AbfgcoVdJh4_aM6-BaSHvrHl8zuA5G4jv_94K8/edit#heading=h.cite1kolful9>`__
    * - flang
      - Multiple meeting series, `documented here <https://github.com/llvm/llvm-project/blob/main/flang/docs/GettingInvolved.md#calls>`__
      -
@@ -192,19 +183,10 @@ what to add to your calendar invite.
      - Every 4 weeks on Tuesdays
      - `ics <http://lists.llvm.org/pipermail/llvm-dev/attachments/20201103/a3499a67/attachment-0001.ics>`__
      - `Minutes/docs <https://docs.google.com/document/d/17U-WvX8qyKc3S36YUKr3xfF-GHunWyYowXbxEdpHscw>`__
-   * - Vector Predication
-     - Every 2 weeks on Tuesdays, 3pm UTC
-     -
-     - `Minutes/docs <https://docs.google.com/document/d/1q26ToudQjnqN5x31zk8zgq_s0lem1-BF8pQmciLa4k8/edit?usp=sharing>`__
    * - LLVM Pointer Authentication
      - Every month on Mondays
      - `ics <https://calendar.google.com/calendar/ical/fr1qtmrmt2s9odufjvurkb6j70%40group.calendar.google.com/public/basic.ics>`__
      - `Minutes/docs <https://discourse.llvm.org/t/llvm-pointer-authentication-sync-ups/62661>`__
-   * - MemorySSA in LLVM
-     - Every 8 weeks on Mondays
-     - `ics <https://calendar.google.com/calendar/ical/c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com/public/basic.ics>`__
-       `gcal <https://calendar.google.com/calendar/embed?src=c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com>`__
-     - `Minutes/docs <https://docs.google.com/document/d/1-uEEZfmRdPThZlctOq9eXlmUaSSAAi8oKxhrPY_lpjk/edit#>`__
    * - LLVM Embedded Toolchains
      - Every 4 weeks on Thursdays
      - `ics <https://drive.google.com/file/d/1uNa-PFYkhAfT83kR2Nc4Fi706TAQFBEL/view?usp=sharing>`__
@@ -222,16 +204,50 @@ what to add to your calendar invite.
      - Every 2 weeks on Mondays
      - `gcal <https://calendar.google.com/calendar/u/0?cid=c3ljbC5sbHZtLndnQGdtYWlsLmNvbQ>`__
      - `Meeting details/agenda <https://docs.google.com/document/d/1ivYDSn_5ChTeiZ7TiO64WC_jYJnGwAUiT9Ngi9cAdFU/edit?usp=sharing>`__
-   * - GlobalISel
-     - Every 2nd Tuesday of the month
-     - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
-     - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__
    * - Floating Point Working Group
      - Every 3rd Wednesday of the month
      - `ics <https://calendar.google.com/calendar/ical/02582507bac79d186900712566ec3fc69b33ac24d7de0a8c76c7b19976f190c0%40group.calendar.google.com/private-6e35506dbfe13812e92e9aa8cd5d761d/basic.ics>`__
        `gcal <https://calendar.google.com/calendar/u/0?cid=MDI1ODI1MDdiYWM3OWQxODY5MDA3MTI1NjZlYzNmYzY5YjMzYWMyNGQ3ZGUwYThjNzZjN2IxOTk3NmYxOTBjMEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
      - `Meeting details/agenda: <https://docs.google.com/document/d/1QcmUlWftPlBi-Wz6b6PipqJfvjpJ-OuRMRnN9Dm2t0c>`__
 
+Past online sync-ups
+^^^^^^^^^^^^^^^^^^^^
+
+Some online sync-ups are no longer happening. We keep pointing to them here to
+keep track of the meeting notes and in case anyone would want to revive them in
+the future.
+
+.. list-table:: LLVM no-longer-happening sync-up calls
+   :widths: 25 25 25 25
+   :header-rows: 1
+
+   * - Topic
+     - Frequency
+     - Calendar link
+     - Minutes/docs link
+   * - Scalable Vectors and Arm SVE
+     - Monthly, every 3rd Tuesday
+     - `ics <https://calendar.google.com/calendar/ical/bjms39pe6k6bo5egtsp7don414%40group.calendar.google.com/public/basic.ics>`__
+       `gcal <https://calendar.google.com/calendar/u/0/embed?src=bjms39pe6k6bo5egtsp7don414@group.calendar.google.com>`__
+     - `Minutes/docs <https://docs.google.com/document/d/1UPH2Hzou5RgGT8XfO39OmVXKEibWPfdYLELSaHr3xzo/edit>`__
+   * - MemorySSA in LLVM
+     - Every 8 weeks on Mondays
+     - `ics <https://calendar.google.com/calendar/ical/c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com/public/basic.ics>`__
+       `gcal <https://calendar.google.com/calendar/embed?src=c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com>`__
+     - `Minutes/docs <https://docs.google.com/document/d/1-uEEZfmRdPThZlctOq9eXlmUaSSAAi8oKxhrPY_lpjk/edit#>`__
+   * - GlobalISel
+     - Every 2nd Tuesday of the month
+     - `gcal <https://calendar.google.com/calendar/u/0?cid=ZDcyMjc0ZjZiZjNhMzFlYmE3NTNkMWM2MGM2NjM5ZWU3ZDE2MjM4MGFlZDc2ZjViY2UyYzMwNzVhZjk4MzQ4ZEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`__
+     - `Meeting details/agenda <https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing>`__
+   * - Vector Predication
+     - Every 2 weeks on Tuesdays, 3pm UTC
+     -
+     - `Minutes/docs <https://docs.google.com/document/d/1q26ToudQjnqN5x31zk8zgq_s0lem1-BF8pQmciLa4k8/edit?usp=sharing>`__
+   * - `MLIR <https://mlir.llvm.org>`__ design meetings
+     - Weekly, on Thursdays
+     -
+     - `Minutes/docs <https://docs.google.com/document/d/1y_9f1AbfgcoVdJh4_aM6-BaSHvrHl8zuA5G4jv_94K8/edit#heading=h.cite1kolful9>`__
+
 .. _office-hours:
 
 Office hours

From 6c3110464bac3600685af9650269b0b2b8669d34 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 15:57:58 +0800
Subject: [PATCH 38/65] [Modules] No transitive source location change (#86912)

This is part of "no transitive change" patch series, "no transitive
source location change". I talked this with @Bigcheese in the tokyo's
WG21 meeting.

The idea comes from @jyknight posted on LLVM discourse. That for:

```
// A.cppm
export module A;
...

// B.cppm
export module B;
import A;
...

//--- C.cppm
export module C;
import C;
```

Almost every time A.cppm changes, we need to recompile `B`. Due to we
think the source location is significant to the semantics. But it may be
good if we can avoid recompiling `C` if the change from `A` wouldn't
change the BMI of B.

# Motivation Example

This patch only cares source locations. So let's focus on source
location's example. We can see the full example from the attached test.

```
//--- A.cppm
export module A;
export template <class T>
struct C {
    T func() {
        return T(43);
    }
};
export int funcA() {
    return 43;
}

//--- A.v1.cppm
export module A;

export template <class T>
struct C {
    T func() {
        return T(43);
    }
};
export int funcA() {
    return 43;
}

//--- B.cppm
export module B;
import A;

export int funcB() {
    return funcA();
}

//--- C.cppm
export module C;
import A;
export void testD() {
    C<int> c;
    c.func();
}
```

Here the only difference between `A.cppm` and `A.v1.cppm` is that
`A.v1.cppm` has an additional blank line. Then the test shows that two
BMI of `B.cppm`, one specified `-fmodule-file=A=A.pcm` and the other
specified `-fmodule-file=A=A.v1.pcm`, should have the bit-wise same
contents.

However, it is a different story for C, since C instantiates templates
from A, and the instantiation records the source information from module
A, which is different from `A` and `A.v1`, so it is expected that the
BMI `C.pcm` and `C.v1.pcm` can and should differ.

# Internal perspective of status quo

To fully understand the patch, we need to understand how we encodes
source locations and how we serialize and deserialize them.

For source locations, we encoded them as:

```
|
|
| _____ base offset of an imported module
|
|
|
|_____ base offset of another imported module
|
|
|
|
| ___ 0
```

As the diagram shows, we encode the local (unloaded) source location
from 0 to higher bits. And we allocate the space for source locations
from the loaded modules from high bits to 0. Then the source locations
from the loaded modules will be mapped to our source location space
according to the allocated offset.

For example, for,

```
// a.cppm
export module a;
...

// b.cppm
export module b;
import a;
...
```

Assuming the offset of a source location (let's name the location as
`S`) in a.cppm is 45 and we will record the value `45` into the BMI
`a.pcm`. Then in b.cppm, when we import a, the source manager will
allocate a space for module 'a' (according to the recorded number of
source locations) as the base offset of module 'a' in the current source
location spaces. Let's assume the allocated base offset as 90 in this
example. Then when we want to get the location in the current source
location space for `S`, we can get it simply by adding `45` to `90` to
`135`. Finally we can get the source location for `S` in module B as
`135`.

And when we want to write module `b`, we would also write the source
location of `S` as `135` directly in the BMI. And to clarify the
location `S` comes from module `a`, we also need to record the base
offset of module `a`, 90 in the BMI of `b`.

Then the problem comes. Since the base offset of module 'a' is computed
by the number source locations in module 'a'. In module 'b', the
recorded base offset of module 'a' will change every time the number of
source locations in module 'a' increase or decrease. In other words, the
contents of BMI of B will change every time the number of locations in
module 'a' changes. This is pretty sensitive. Almost every change will
change the number of locations. So this is the problem this patch want
to solve.

Let's continue with the existing design to understand what's going on.
Another interesting case is:

```
// c.cppm
export module c;
import whatever;
import a;
import b;
...
```

In `c.cppm`, when we import `a`, we still need to allocate a base
location offset for it, let's say the value becomes to `200` somehow.
Then when we reach the location `S` recorded in module `b`, we need to
translate it into the current source location space. The solution is
quite simple, we can get it by `135 + (200 - 90) = 245`. In another
word, the offset of a source location in current module can be computed
as `Recorded Offset + Base Offset of the its module file - Recorded Base
Offset`.

Then we're almost done about how we handle the offset of source
locations in serializers.

# The high level design of current patch

From the abstract level, what we want to do is to remove the hardcoded
base offset of imported modules and remain the ability to calculate the
source location in a new module unit. To achieve this, we need to be
able to find the module file owning a source location from the encoding
of the source location.

So in this patch, for each source location, we will store the local
offset of the location and the module file index. For the above example,
in `b.pcm`, the source location of `S` will be recorded as `135`
directly. And in the new design, the source location of `S` will be
recorded as `<1, 45>`. Here `1` stands for the module file index of `a`
in module `b`. And `45` means the offset of `S` to the base offset of
module `a`.

So the trade-off here is that, to make the BMI more independent, we need
to record more abstract information. And I feel it is worthy. The
recompilation problem of modules is really annoying and there are still
people complaining this. But if we can make this (including stopping
other changes transitively), I think this may be a killer feature for
modules. And from @Bigcheese , this should be helpful for clang explicit
modules too.

And the benchmarking side, I tested this patch against
https://github.com/alibaba/async_simple/tree/CXX20Modules. No
significant change on compilation time. The size of .pcm files becomes
to 204M from 200M. I think the trade-off is pretty fair.

# Some low level details

I didn't use another slot to record the module file index. I tried to
use the higher 32 bits of the existing source location encodings to
store that information. This design may be safe. Since we use `unsigned`
to store source locations but we use uint64_t in serialization. And
generally `unsigned` is 32 bit width in most platforms. So it might not
be a safe problem. Since all the bits we used to store the module file
index is not used before. So the new encodings may be:

```
   |-----------------------|-----------------------|
   |           A           |         B         | C |

  * A: 32 bit. The index of the module file in the module manager + 1. The +1
          here is necessary since we wish 0 stands for the current module file.
  * B: 31 bit. The offset of the source location to the module file containing it.
  * C: The macro bit. We rotate it to the lowest bit so that we can save some
          space in case the index of the module file is 0.
```

(The B and C is the existing raw encoding for source locations)

Another reason to reuse the same slot of the source location is to
reduce the impact of the patch. Since there are a lot of places assuming
we can store and get a source location from a slot. And if I tried to
add another slot, a lot of codes breaks. I don't feel it is worhty.

Another impact of this decision is that, the existing small
optimizations for encoding source location may be invalided. The key of
the optimization is that we can turn large values into small values then
we can use VBR6 format to reduce the size. But if we decided to put the
module file index into the higher bits, then maybe it simply doesn't
work. An example may be the `SourceLocationSequence` optimization.

This will only affect the size of on-disk .pcm files. I don't expect
this impact the speed and memory use of compilations. And seeing my
small experiments above, I feel this trade off is worthy.

# Correctness

The mental model for handling source location offsets is not so complex
and I believe we can solve it by adding module file index to each stored
source location.

For the practical side, since the source location is pretty sensitive,
and the patch can pass all the in-tree tests and a small scale projects,
I feel it should be correct.

# Future Plans

I'll continue to work on no transitive decl change and no transitive
identifier change (if matters) to achieve the goal to stop the
propagation of unnecessary changes. But all of this depends on this
patch. Since, clearly, the source locations are the most sensitive
thing.

---

The release nots and documentation will be added seperately.
---
 clang/include/clang/Basic/SourceLocation.h    |  1 +
 .../include/clang/Serialization/ASTBitCodes.h | 56 +++++-------
 clang/include/clang/Serialization/ASTReader.h | 48 ++++++----
 clang/include/clang/Serialization/ASTWriter.h |  4 +
 .../include/clang/Serialization/ModuleFile.h  | 14 ++-
 .../Serialization/SourceLocationEncoding.h    | 91 +++++++++++++------
 clang/lib/Frontend/ASTUnit.cpp                |  2 -
 clang/lib/Serialization/ASTReader.cpp         | 57 ++++--------
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 41 +++++++--
 clang/lib/Serialization/ASTWriterDecl.cpp     |  8 +-
 clang/lib/Serialization/ModuleFile.cpp        |  1 -
 .../no-transitive-source-location-change.cppm | 69 ++++++++++++++
 clang/test/Modules/pr61067.cppm               | 25 -----
 .../SourceLocationEncodingTest.cpp            | 12 ++-
 15 files changed, 269 insertions(+), 162 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-source-location-change.cppm

diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 00b1e0fa855b7a..7a0f5ba8d1270b 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -90,6 +90,7 @@ class SourceLocation {
   friend class ASTWriter;
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait<SourceLocation, void>;
+  friend class SourceLocationEncoding;
 
 public:
   using UIntTy = uint32_t;
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index a8df5a0bda0850..93e971d7e142c3 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -23,6 +23,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
 #include <cassert>
@@ -167,45 +168,38 @@ const unsigned int NUM_PREDEF_SUBMODULE_IDS = 1;
 
 /// Source range/offset of a preprocessed entity.
 struct PPEntityOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
 
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
   /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
   uint32_t BitOffset;
 
-  PPEntityOffset(SourceRange R, uint32_t BitOffset)
-      : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()),
-        BitOffset(BitOffset) {}
-
-  SourceLocation getBegin() const {
-    return SourceLocation::getFromRawEncoding(Begin);
-  }
+  PPEntityOffset(RawLocEncoding Begin, RawLocEncoding End, uint32_t BitOffset)
+      : Begin(Begin), End(End), BitOffset(BitOffset) {}
 
-  SourceLocation getEnd() const {
-    return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Source range of a skipped preprocessor region
 struct PPSkippedRange {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location of beginning of range.
-  SourceLocation::UIntTy Begin;
+  RawLocEncoding Begin;
   /// Raw source location of end of range.
-  SourceLocation::UIntTy End;
+  RawLocEncoding End;
 
-  PPSkippedRange(SourceRange R)
-      : Begin(R.getBegin().getRawEncoding()), End(R.getEnd().getRawEncoding()) {
-  }
+  PPSkippedRange(RawLocEncoding Begin, RawLocEncoding End)
+      : Begin(Begin), End(End) {}
 
-  SourceLocation getBegin() const {
-    return SourceLocation::getFromRawEncoding(Begin);
-  }
-  SourceLocation getEnd() const {
-    return SourceLocation::getFromRawEncoding(End);
-  }
+  RawLocEncoding getBegin() const { return Begin; }
+  RawLocEncoding getEnd() const { return End; }
 };
 
 /// Offset in the AST file. Use splitted 64-bit integer into low/high
@@ -231,8 +225,10 @@ struct UnderalignedInt64 {
 
 /// Source location and bit offset of a declaration.
 struct DeclOffset {
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Raw source location.
-  SourceLocation::UIntTy Loc = 0;
+  RawLocEncoding RawLoc = 0;
 
   /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep
   /// structure alignment 32-bit and avoid padding gap because undefined
@@ -240,17 +236,15 @@ struct DeclOffset {
   UnderalignedInt64 BitOffset;
 
   DeclOffset() = default;
-  DeclOffset(SourceLocation Loc, uint64_t BitOffset,
-             uint64_t DeclTypesBlockStartOffset) {
-    setLocation(Loc);
+  DeclOffset(RawLocEncoding RawLoc, uint64_t BitOffset,
+             uint64_t DeclTypesBlockStartOffset)
+      : RawLoc(RawLoc) {
     setBitOffset(BitOffset, DeclTypesBlockStartOffset);
   }
 
-  void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
+  void setRawLoc(RawLocEncoding Loc) { RawLoc = Loc; }
 
-  SourceLocation getLocation() const {
-    return SourceLocation::getFromRawEncoding(Loc);
-  }
+  RawLocEncoding getRawLoc() const { return RawLoc; }
 
   void setBitOffset(uint64_t Offset, const uint64_t DeclTypesBlockStartOffset) {
     BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset);
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 64f1ebc117b327..e24fa121528f3f 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1771,6 +1771,7 @@ class ASTReader
 
   /// Retrieve the module manager.
   ModuleManager &getModuleManager() { return ModuleMgr; }
+  const ModuleManager &getModuleManager() const { return ModuleMgr; }
 
   /// Retrieve the preprocessor.
   Preprocessor &getPreprocessor() const { return PP; }
@@ -2177,8 +2178,8 @@ class ASTReader
 
   /// Retrieve the global submodule ID given a module and its local ID
   /// number.
-  serialization::SubmoduleID
-  getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID);
+  serialization::SubmoduleID getGlobalSubmoduleID(ModuleFile &M,
+                                                  unsigned LocalID) const;
 
   /// Retrieve the submodule that corresponds to a global submodule ID.
   ///
@@ -2191,7 +2192,7 @@ class ASTReader
 
   /// Retrieve the module file with a given local ID within the specified
   /// ModuleFile.
-  ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
+  ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID) const;
 
   /// Get an ID for the given module file.
   unsigned getModuleFileID(ModuleFile *M);
@@ -2227,33 +2228,46 @@ class ASTReader
     return Sema::AlignPackInfo::getFromRawEncoding(Raw);
   }
 
+  using RawLocEncoding = SourceLocationEncoding::RawLocEncoding;
+
   /// Read a source location from raw form and return it in its
   /// originating module file's source location space.
-  SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw,
-                                                LocSeq *Seq = nullptr) const {
+  std::pair<SourceLocation, unsigned>
+  ReadUntranslatedSourceLocation(RawLocEncoding Raw,
+                                 LocSeq *Seq = nullptr) const {
     return SourceLocationEncoding::decode(Raw, Seq);
   }
 
   /// Read a source location from raw form.
-  SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
-                                    SourceLocation::UIntTy Raw,
+  SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
                                     LocSeq *Seq = nullptr) const {
-    SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq);
-    return TranslateSourceLocation(ModuleFile, Loc);
+    if (!MF.ModuleOffsetMap.empty())
+      ReadModuleOffsetMap(MF);
+
+    auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
+    ModuleFile *OwningModuleFile =
+        ModuleFileIndex == 0 ? &MF : MF.DependentModules[ModuleFileIndex - 1];
+
+    assert(!SourceMgr.isLoadedSourceLocation(Loc) &&
+           "Run out source location space");
+
+    return TranslateSourceLocation(*OwningModuleFile, Loc);
   }
 
   /// Translate a source location from another module file's source
   /// location space into ours.
   SourceLocation TranslateSourceLocation(ModuleFile &ModuleFile,
                                          SourceLocation Loc) const {
-    if (!ModuleFile.ModuleOffsetMap.empty())
-      ReadModuleOffsetMap(ModuleFile);
-    assert(ModuleFile.SLocRemap.find(Loc.getOffset()) !=
-               ModuleFile.SLocRemap.end() &&
-           "Cannot find offset to remap.");
-    SourceLocation::IntTy Remap =
-        ModuleFile.SLocRemap.find(Loc.getOffset())->second;
-    return Loc.getLocWithOffset(Remap);
+    if (Loc.isInvalid())
+      return Loc;
+
+    // FIXME: TranslateSourceLocation is not re-enterable. It is problematic
+    // to call TranslateSourceLocation on a translated source location.
+    // We either need a method to know whether or not a source location is
+    // translated or refactor the code to make it clear that
+    // TranslateSourceLocation won't be called with translated source location.
+
+    return Loc.getLocWithOffset(ModuleFile.SLocEntryBaseOffset - 2);
   }
 
   /// Read a source location.
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 6c45b7348b8552..428bf6a5a791b3 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -666,6 +666,10 @@ class ASTWriter : public ASTDeserializationListener,
   void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
                          LocSeq *Seq = nullptr);
 
+  /// Return the raw encodings for source locations.
+  SourceLocationEncoding::RawLocEncoding
+  getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
+
   /// Emit a source range.
   void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
                       LocSeq *Seq = nullptr);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 25f644e76edb1a..ba8df0ed259427 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -295,10 +295,6 @@ class ModuleFile {
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
 
-  /// Remapping table for source locations in this module.
-  ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2>
-      SLocRemap;
-
   // === Identifiers ===
 
   /// The number of identifiers in this AST file.
@@ -512,9 +508,17 @@ class ModuleFile {
   /// List of modules which depend on this module
   llvm::SetVector<ModuleFile *> ImportedBy;
 
-  /// List of modules which this module depends on
+  /// List of modules which this module directly imported
   llvm::SetVector<ModuleFile *> Imports;
 
+  /// List of modules which this modules dependent on. Different
+  /// from `Imports`, this includes indirectly imported modules too.
+  /// The order of DependentModules is significant. It should keep
+  /// the same order with that module file manager when we write
+  /// the current module file. The value of the member will be initialized
+  /// in `ASTReader::ReadModuleOffsetMap`.
+  llvm::SmallVector<ModuleFile *, 16> DependentModules;
+
   /// Determine whether this module was directly imported at
   /// any point during translation.
   bool isDirectlyImported() const { return DirectlyImported; }
diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h
index 9bb0dbe2e4d6f6..33ca1728fa4797 100644
--- a/clang/include/clang/Serialization/SourceLocationEncoding.h
+++ b/clang/include/clang/Serialization/SourceLocationEncoding.h
@@ -6,28 +6,33 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Source locations are stored pervasively in the AST, making up a third of
-// the size of typical serialized files. Storing them efficiently is important.
+// We wish to encode the SourceLocation from other module file not dependent
+// on the other module file. So that the source location changes from other
+// module file may not affect the contents of the current module file. Then the
+// users don't need to recompile the whole project due to a new line in a module
+// unit in the root of the dependency graph.
 //
-// We use integers optimized by VBR-encoding, because:
-//  - when abbreviations cannot be used, VBR6 encoding is our only choice
-//  - in the worst case a SourceLocation can be ~any 32-bit number, but in
-//    practice they are highly predictable
+// To achieve this, we need to encode the index of the module file into the
+// encoding of the source location. The encoding of the source location may be:
 //
-// We encode the integer so that likely values encode as small numbers that
-// turn into few VBR chunks:
-//  - the invalid sentinel location is a very common value: it encodes as 0
-//  - the "macro or not" bit is stored at the bottom of the integer
-//    (rather than at the top, as in memory), so macro locations can have
-//    small representations.
-//  - related locations (e.g. of a left and right paren pair) are usually
-//    similar, so when encoding a sequence of locations we store only
-//    differences between successive elements.
+//      |-----------------------|-----------------------|
+//      |          A            |         B         | C |
+//
+//  * A: 32 bit. The index of the module file in the module manager + 1. The +1
+//  here is necessary since we wish 0 stands for the current module file.
+//  * B: 31 bit. The offset of the source location to the module file containing
+//  it.
+//  * C: The macro bit. We rotate it to the lowest bit so that we can save some
+//  space in case the index of the module file is 0.
+//
+// Specially, if the index of the module file is 0, we allow to encode a
+// sequence of locations we store only differences between successive elements.
 //
 //===----------------------------------------------------------------------===//
 
-#include <climits>
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/Support/MathExtras.h"
+#include <climits>
 
 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
@@ -52,9 +57,13 @@ class SourceLocationEncoding {
   friend SourceLocationSequence;
 
 public:
-  static uint64_t encode(SourceLocation Loc,
-                         SourceLocationSequence * = nullptr);
-  static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
+  using RawLocEncoding = uint64_t;
+
+  static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
+                               unsigned BaseModuleFileIndex,
+                               SourceLocationSequence * = nullptr);
+  static std::pair<SourceLocation, unsigned>
+  decode(RawLocEncoding, SourceLocationSequence * = nullptr);
 };
 
 /// Serialized encoding of a sequence of SourceLocations.
@@ -149,14 +158,44 @@ class SourceLocationSequence::State {
   operator SourceLocationSequence *() { return &Seq; }
 };
 
-inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
-                                               SourceLocationSequence *Seq) {
-  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+inline SourceLocationEncoding::RawLocEncoding
+SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
+                               unsigned BaseModuleFileIndex,
+                               SourceLocationSequence *Seq) {
+  // If the source location is a local source location, we can try to optimize
+  // the similar sequences to only record the differences.
+  if (!BaseOffset)
+    return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
+
+  if (Loc.isInvalid())
+    return 0;
+
+  // Otherwise, the higher bits are used to store the module file index,
+  // so it is meaningless to optimize the source locations into small
+  // integers. Let's try to always use the raw encodings.
+  assert(Loc.getOffset() >= BaseOffset);
+  Loc = Loc.getLocWithOffset(-BaseOffset);
+  RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
+
+  // 16 bits should be sufficient to store the module file index.
+  assert(BaseModuleFileIndex < (1 << 16));
+  Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
+  return Encoded;
 }
-inline SourceLocation
-SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
-  return Seq ? Seq->decode(Encoded)
-             : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
+inline std::pair<SourceLocation, unsigned>
+SourceLocationEncoding::decode(RawLocEncoding Encoded,
+                               SourceLocationSequence *Seq) {
+  unsigned ModuleFileIndex = Encoded >> 32;
+
+  if (!ModuleFileIndex)
+    return {Seq ? Seq->decode(Encoded)
+                : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
+            ModuleFileIndex};
+
+  Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
+  SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
+
+  return {Loc, ModuleFileIndex};
 }
 
 } // namespace clang
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 1b93588553a276..755aaddc0ad747 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -2374,8 +2374,6 @@ bool ASTUnit::serialize(raw_ostream &OS) {
   return serializeUnit(Writer, Buffer, getSema(), OS);
 }
 
-using SLocRemap = ContinuousRangeMap<unsigned, int, 2>;
-
 void ASTUnit::TranslateStoredDiagnostics(
                           FileManager &FileMgr,
                           SourceManager &SrcMgr,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0ef57a3ea804ef..a670f2a2af41c7 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3030,8 +3030,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         // The import location will be the local one for now; we will adjust
         // all import locations of module imports after the global source
         // location info are setup, in ReadAST.
-        SourceLocation ImportLoc =
+        auto [ImportLoc, ImportModuleFileIndex] =
             ReadUntranslatedSourceLocation(Record[Idx++]);
+        // The import location must belong to the current module file itself.
+        assert(ImportModuleFileIndex == 0);
         off_t StoredSize = !IsImportingStdCXXModule ? (off_t)Record[Idx++] : 0;
         time_t StoredModTime =
             !IsImportingStdCXXModule ? (time_t)Record[Idx++] : 0;
@@ -3653,13 +3655,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
           std::make_pair(SourceManager::MaxLoadedOffset - F.SLocEntryBaseOffset
                            - SLocSpaceSize,&F));
 
-      // Initialize the remapping table.
-      // Invalid stays invalid.
-      F.SLocRemap.insertOrReplace(std::make_pair(0U, 0));
-      // This module. Base was 2 when being compiled.
-      F.SLocRemap.insertOrReplace(std::make_pair(
-          2U, static_cast<SourceLocation::IntTy>(F.SLocEntryBaseOffset - 2)));
-
       TotalNumSLocEntries += F.LocalNumSLocEntries;
       break;
     }
@@ -4047,18 +4042,7 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   const unsigned char *DataEnd = Data + F.ModuleOffsetMap.size();
   F.ModuleOffsetMap = StringRef();
 
-  // If we see this entry before SOURCE_LOCATION_OFFSETS, add placeholders.
-  if (F.SLocRemap.find(0) == F.SLocRemap.end()) {
-    F.SLocRemap.insert(std::make_pair(0U, 0));
-    F.SLocRemap.insert(std::make_pair(2U, 1));
-  }
-
-  // Continuous range maps we may be updating in our module.
-  using SLocRemapBuilder =
-      ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy,
-                         2>::Builder;
   using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder;
-  SLocRemapBuilder SLocRemap(F.SLocRemap);
   RemapBuilder IdentifierRemap(F.IdentifierRemap);
   RemapBuilder MacroRemap(F.MacroRemap);
   RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap);
@@ -4067,6 +4051,9 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
   RemapBuilder DeclRemap(F.DeclRemap);
   RemapBuilder TypeRemap(F.TypeRemap);
 
+  auto &ImportedModuleVector = F.DependentModules;
+  assert(ImportedModuleVector.empty());
+
   while (Data < DataEnd) {
     // FIXME: Looking up dependency modules by filename is horrible. Let's
     // start fixing this with prebuilt, explicit and implicit modules and see
@@ -4082,15 +4069,14 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
                           ? ModuleMgr.lookupByModuleName(Name)
                           : ModuleMgr.lookupByFileName(Name));
     if (!OM) {
-      std::string Msg =
-          "SourceLocation remap refers to unknown module, cannot find ";
+      std::string Msg = "refers to unknown module, cannot find ";
       Msg.append(std::string(Name));
       Error(Msg);
       return;
     }
 
-    SourceLocation::UIntTy SLocOffset =
-        endian::readNext<uint32_t, llvm::endianness::little>(Data);
+    ImportedModuleVector.push_back(OM);
+
     uint32_t IdentifierIDOffset =
         endian::readNext<uint32_t, llvm::endianness::little>(Data);
     uint32_t MacroIDOffset =
@@ -4114,13 +4100,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
                                     static_cast<int>(BaseOffset - Offset)));
     };
 
-    constexpr SourceLocation::UIntTy SLocNone =
-        std::numeric_limits<SourceLocation::UIntTy>::max();
-    if (SLocOffset != SLocNone)
-      SLocRemap.insert(std::make_pair(
-          SLocOffset, static_cast<SourceLocation::IntTy>(
-                          OM->SLocEntryBaseOffset - SLocOffset)));
-
     mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap);
     mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap);
     mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID,
@@ -6256,8 +6235,8 @@ SourceRange ASTReader::ReadSkippedRange(unsigned GlobalIndex) {
   unsigned LocalIndex = GlobalIndex - M->BasePreprocessedSkippedRangeID;
   assert(LocalIndex < M->NumPreprocessedSkippedRanges);
   PPSkippedRange RawRange = M->PreprocessedSkippedRangeOffsets[LocalIndex];
-  SourceRange Range(TranslateSourceLocation(*M, RawRange.getBegin()),
-                    TranslateSourceLocation(*M, RawRange.getEnd()));
+  SourceRange Range(ReadSourceLocation(*M, RawRange.getBegin()),
+                    ReadSourceLocation(*M, RawRange.getEnd()));
   assert(Range.isValid());
   return Range;
 }
@@ -6293,8 +6272,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
     return nullptr;
 
   // Read the record.
-  SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()),
-                    TranslateSourceLocation(M, PPOffs.getEnd()));
+  SourceRange Range(ReadSourceLocation(M, PPOffs.getBegin()),
+                    ReadSourceLocation(M, PPOffs.getEnd()));
   PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
   StringRef Blob;
   RecordData Record;
@@ -6406,7 +6385,7 @@ struct PPEntityComp {
   }
 
   SourceLocation getLoc(const PPEntityOffset &PPE) const {
-    return Reader.TranslateSourceLocation(M, PPE.getBegin());
+    return Reader.ReadSourceLocation(M, PPE.getBegin());
   }
 };
 
@@ -6450,7 +6429,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
       PPI = First;
       std::advance(PPI, Half);
       if (SourceMgr.isBeforeInTranslationUnit(
-              TranslateSourceLocation(M, PPI->getEnd()), Loc)) {
+              ReadSourceLocation(M, PPI->getEnd()), Loc)) {
         First = PPI;
         ++First;
         Count = Count - Half - 1;
@@ -6491,7 +6470,7 @@ std::optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
   unsigned LocalIndex = PPInfo.second;
   const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
 
-  SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin());
+  SourceLocation Loc = ReadSourceLocation(M, PPOffs.getBegin());
   if (Loc.isInvalid())
     return false;
 
@@ -8964,7 +8943,7 @@ MacroID ASTReader::getGlobalMacroID(ModuleFile &M, unsigned LocalID) {
 }
 
 serialization::SubmoduleID
-ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) {
+ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const {
   if (LocalID < NUM_PREDEF_SUBMODULE_IDS)
     return LocalID;
 
@@ -8997,7 +8976,7 @@ Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }
 
-ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) {
+ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) const {
   if (ID & 1) {
     // It's a module, look it up by submodule ID.
     auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(M, ID >> 1));
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 744f11de88c2f8..40b543f2b72462 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -3249,7 +3249,7 @@ ASTReader::RecordLocation ASTReader::DeclCursorForID(GlobalDeclID ID,
   ModuleFile *M = I->second;
   const DeclOffset &DOffs =
       M->DeclOffsets[ID.get() - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
-  Loc = TranslateSourceLocation(*M, DOffs.getLocation());
+  Loc = ReadSourceLocation(*M, DOffs.getRawLoc());
   return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
 }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 7db60c67d71234..4d85f6eb10d232 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2677,8 +2677,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
 
     uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
     assert((Offset >> 32) == 0 && "Preprocessed entity offset too large");
-    PreprocessedEntityOffsets.push_back(
-        PPEntityOffset(getAdjustedRange((*E)->getSourceRange()), Offset));
+    SourceRange R = getAdjustedRange((*E)->getSourceRange());
+    PreprocessedEntityOffsets.emplace_back(
+        getRawSourceLocationEncoding(R.getBegin()),
+        getRawSourceLocationEncoding(R.getEnd()), Offset);
 
     if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
@@ -2745,7 +2747,9 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
     std::vector<PPSkippedRange> SerializedSkippedRanges;
     SerializedSkippedRanges.reserve(SkippedRanges.size());
     for (auto const& Range : SkippedRanges)
-      SerializedSkippedRanges.emplace_back(Range);
+      SerializedSkippedRanges.emplace_back(
+          getRawSourceLocationEncoding(Range.getBegin()),
+          getRawSourceLocationEncoding(Range.getEnd()));
 
     using namespace llvm;
     auto Abbrev = std::make_shared<BitCodeAbbrev>();
@@ -2911,8 +2915,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
       ParentID = SubmoduleIDs[Mod->Parent];
     }
 
-    uint64_t DefinitionLoc =
-        SourceLocationEncoding::encode(getAdjustedLocation(Mod->DefinitionLoc));
+    SourceLocationEncoding::RawLocEncoding DefinitionLoc =
+        getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc));
 
     // Emit the definition of the block.
     {
@@ -5328,7 +5332,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
 
         // These values should be unique within a chain, since they will be read
         // as keys into ContinuousRangeMaps.
-        writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries);
         writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers);
         writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros);
         writeBaseIDOrNone(M.BasePreprocessedEntityID,
@@ -5821,10 +5824,34 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) {
   Record.push_back(getAdjustedFileID(FID).getOpaqueValue());
 }
 
+SourceLocationEncoding::RawLocEncoding
+ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) {
+  unsigned BaseOffset = 0;
+  unsigned ModuleFileIndex = 0;
+
+  // See SourceLocationEncoding.h for the encoding details.
+  if (Context->getSourceManager().isLoadedSourceLocation(Loc) &&
+      Loc.isValid()) {
+    assert(getChain());
+    auto SLocMapI = getChain()->GlobalSLocOffsetMap.find(
+        SourceManager::MaxLoadedOffset - Loc.getOffset() - 1);
+    assert(SLocMapI != getChain()->GlobalSLocOffsetMap.end() &&
+           "Corrupted global sloc offset map");
+    ModuleFile *F = SLocMapI->second;
+    BaseOffset = F->SLocEntryBaseOffset - 2;
+    // 0 means the location is not loaded. So we need to add 1 to the index to
+    // make it clear.
+    ModuleFileIndex = F->Index + 1;
+    assert(&getChain()->getModuleManager()[F->Index] == F);
+  }
+
+  return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq);
+}
+
 void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
                                   SourceLocationSequence *Seq) {
   Loc = getAdjustedLocation(Loc);
-  Record.push_back(SourceLocationEncoding::encode(Loc, Seq));
+  Record.push_back(getRawSourceLocationEncoding(Loc, Seq));
 }
 
 void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record,
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 0edc4feda3ef23..6201d284f0e025 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2809,14 +2809,16 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
 
   // Record the offset for this declaration
   SourceLocation Loc = D->getLocation();
+  SourceLocationEncoding::RawLocEncoding RawLoc =
+      getRawSourceLocationEncoding(getAdjustedLocation(Loc));
+
   unsigned Index = ID.get() - FirstDeclID.get();
   if (DeclOffsets.size() == Index)
-    DeclOffsets.emplace_back(getAdjustedLocation(Loc), Offset,
-                             DeclTypesBlockStartOffset);
+    DeclOffsets.emplace_back(RawLoc, Offset, DeclTypesBlockStartOffset);
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
-    DeclOffsets[Index].setLocation(getAdjustedLocation(Loc));
+    DeclOffsets[Index].setRawLoc(RawLoc);
     DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset);
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp
index db896fd361159d..2c42d33a8f5dd3 100644
--- a/clang/lib/Serialization/ModuleFile.cpp
+++ b/clang/lib/Serialization/ModuleFile.cpp
@@ -59,7 +59,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() {
   // Remapping tables.
   llvm::errs() << "  Base source location offset: " << SLocEntryBaseOffset
                << '\n';
-  dumpLocalRemap("Source location offset local -> global map", SLocRemap);
 
   llvm::errs() << "  Base identifier ID: " << BaseIdentifierID << '\n'
                << "  Number of identifiers: " << LocalNumIdentifiers << '\n';
diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm
new file mode 100644
index 00000000000000..b876fbb6d9f0cf
--- /dev/null
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -0,0 +1,69 @@
+// Testing that adding a new line in a module interface unit won't cause the BMI
+// of consuming module unit changes.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm
+//
+// The BMI may not be the same since the source location differs.
+// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null
+//
+// The BMI of B shouldn't change since all the locations remain the same.
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/B.v1.pcm
+// RUN: diff %t/B.v1.pcm %t/B.pcm  &> /dev/null
+//
+// The BMI of C may change since the locations for instantiations changes.
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/C.pcm
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/C.v1.pcm
+// RUN: not diff %t/C.v1.pcm %t/C.pcm  &> /dev/null
+
+//--- A.cppm
+export module A;
+export template <class T>
+struct C {
+    T func() {
+        return T(43);
+    }
+};
+export int funcA() {
+    return 43;
+}
+
+//--- A.v1.cppm
+export module A;
+
+export template <class T>
+struct C {
+    T func() {
+        return T(43);
+    }
+};
+export int funcA() {
+    return 43;
+}
+
+//--- B.cppm
+export module B;
+import A;
+
+export int funcB() {
+    return funcA();
+}
+
+//--- C.cppm
+export module C;
+import A;
+export void testD() {
+    C<int> c;
+    c.func();
+}
diff --git a/clang/test/Modules/pr61067.cppm b/clang/test/Modules/pr61067.cppm
index b7f9d22e253854..f853491fe76bf7 100644
--- a/clang/test/Modules/pr61067.cppm
+++ b/clang/test/Modules/pr61067.cppm
@@ -9,22 +9,6 @@
 // RUN:     -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm
 // RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \
 // RUN:     -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \
-// RUN:     -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp
-
-// Test again with reduced BMI
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-//
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/a.cppm \
-// RUN:     -emit-reduced-module-interface -o %t/a.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.cppm \
-// RUN:     -emit-module-interface -fmodule-file=a=%t/a.pcm -o %t/b.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/b.pcm -S \
-// RUN:     -emit-llvm -fmodule-file=a=%t/a.pcm -disable-llvm-passes -o - | FileCheck %t/b.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/c.cpp -fmodule-file=a=%t/a.pcm \
-// RUN:     -S -emit-llvm -disable-llvm-passes -o - | FileCheck %t/c.cpp
 
 //--- a.cppm
 export module a;
@@ -43,12 +27,3 @@ void b() {
 }
 
 // CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_(
-
-//--- c.cpp
-import a;
-
-int c() {
-    (void)(a() == a());
-}
-
-// CHECK: define{{.*}}linkonce_odr{{.*}}@_ZW1aeqS_1aS0_(
diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
index 141da4c27f8d0b..c80a8fd0e52b17 100644
--- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
+++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
@@ -23,13 +23,14 @@ using LocSeq = SourceLocationSequence;
 // Loc is the raw (in-memory) form of SourceLocation.
 void roundTrip(SourceLocation::UIntTy Loc,
                std::optional<uint64_t> ExpectedEncoded = std::nullopt) {
-  uint64_t ActualEncoded =
-      SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc));
+  uint64_t ActualEncoded = SourceLocationEncoding::encode(
+      SourceLocation::getFromRawEncoding(Loc), /*BaseOffset=*/0,
+      /*BaseModuleFileIndex=*/0);
   if (ExpectedEncoded) {
     ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc;
   }
   SourceLocation::UIntTy DecodedEncoded =
-      SourceLocationEncoding::decode(ActualEncoded).getRawEncoding();
+      SourceLocationEncoding::decode(ActualEncoded).first.getRawEncoding();
   ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded;
 }
 
@@ -41,7 +42,8 @@ void roundTrip(std::vector<SourceLocation::UIntTy> Locs,
     LocSeq::State Seq;
     for (auto L : Locs)
       ActualEncoded.push_back(SourceLocationEncoding::encode(
-          SourceLocation::getFromRawEncoding(L), Seq));
+          SourceLocation::getFromRawEncoding(L), /*BaseOffset=*/0,
+          /*BaseModuleFileIndex=*/0, Seq));
     if (!ExpectedEncoded.empty()) {
       ASSERT_EQ(ActualEncoded, ExpectedEncoded)
           << "Encoding " << testing::PrintToString(Locs);
@@ -51,7 +53,7 @@ void roundTrip(std::vector<SourceLocation::UIntTy> Locs,
   {
     LocSeq::State Seq;
     for (auto L : ActualEncoded) {
-      SourceLocation Loc = SourceLocationEncoding::decode(L, Seq);
+      SourceLocation Loc = SourceLocationEncoding::decode(L, Seq).first;
       DecodedEncoded.push_back(Loc.getRawEncoding());
     }
     ASSERT_EQ(DecodedEncoded, Locs)

From 2464c1c153285bbabf1df4a61f1975903ffbe70e Mon Sep 17 00:00:00 2001
From: Johannes de Fine Licht <johannes@musicmedia.dk>
Date: Tue, 30 Apr 2024 10:00:04 +0200
Subject: [PATCH 39/65] [MLIR] Sprinkle extra asserts in OperationSupport.h
 (#90465)

Should hopefully help shave some minutes off developer debugging time in
the future.
---
 mlir/include/mlir/IR/OperationSupport.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index cdb75a3777ad80..e661bb87a27ed0 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -1037,8 +1037,11 @@ struct OperationState {
     addAttribute(StringAttr::get(getContext(), name), attr);
   }
 
-  /// Add an attribute with the specified name.
+  /// Add an attribute with the specified name. `name` and `attr` must not be
+  /// null.
   void addAttribute(StringAttr name, Attribute attr) {
+    assert(name && "attribute name cannot be null");
+    assert(attr && "attribute cannot be null");
     attributes.append(name, attr);
   }
 
@@ -1047,7 +1050,11 @@ struct OperationState {
     attributes.append(newAttributes);
   }
 
-  void addSuccessors(Block *successor) { successors.push_back(successor); }
+  /// Adds a successor to the operation sate. `successor` must not be null.
+  void addSuccessors(Block *successor) {
+    assert(successor && "successor cannot be null");
+    successors.push_back(successor);
+  }
   void addSuccessors(BlockRange newSuccessors);
 
   /// Create a region that should be attached to the operation.  These regions

From 92ca6fcb87a1b8b0cef3b0a8c960b4d7d0fc12a0 Mon Sep 17 00:00:00 2001
From: Johannes de Fine Licht <johannes@musicmedia.dk>
Date: Tue, 30 Apr 2024 10:00:28 +0200
Subject: [PATCH 40/65] [MLIR][LLVM] Have LLVM::AddressOfOp implement
 ConstantLike (#90481)

For all means and purposes llvm.mlir.addressof acts like a constant, and
should be treated as such by passes. In particular, the operation should
be propagated rather than passed whenever possible.
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  8 +--
 mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp    | 13 ++++-
 .../test/Dialect/LLVMIR/constant-folding.mlir | 50 +++++++++++++++++++
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index eedae4b9bb7c8e..6655ce6f123e14 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -63,12 +63,12 @@ class LLVM_IntArithmeticOpWithOverflowFlag<string mnemonic, string instName,
   let arguments = !con(commonArgs, iofArg);
 
   let builders = [
-    OpBuilder<(ins "Type":$type, "Value":$lhs, "Value":$rhs, 
+    OpBuilder<(ins "Type":$type, "Value":$lhs, "Value":$rhs,
                    "IntegerOverflowFlags":$overflowFlags), [{
       build($_builder, $_state, type, lhs, rhs);
       $_state.getOrAddProperties<Properties>().overflowFlags = overflowFlags;
     }]>,
-    OpBuilder<(ins "Value":$lhs, "Value":$rhs, 
+    OpBuilder<(ins "Value":$lhs, "Value":$rhs,
                    "IntegerOverflowFlags":$overflowFlags), [{
       build($_builder, $_state, lhs, rhs);
       $_state.getOrAddProperties<Properties>().overflowFlags = overflowFlags;
@@ -1052,7 +1052,7 @@ def LLVM_SwitchOp : LLVM_TerminatorOp<"switch",
 ////////////////////////////////////////////////////////////////////////////////
 
 def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof",
-    [Pure, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
+    [Pure, ConstantLike, DeclareOpInterfaceMethods<SymbolUserOpInterface>]> {
   let arguments = (ins FlatSymbolRefAttr:$global_name);
   let results = (outs LLVM_AnyPointer:$res);
 
@@ -1114,6 +1114,8 @@ def LLVM_AddressOfOp : LLVM_Op<"mlir.addressof",
   }];
 
   let assemblyFormat = "$global_name attr-dict `:` qualified(type($res))";
+
+  let hasFolder = 1;
 }
 
 def LLVM_GlobalOp : LLVM_Op<"mlir.global",
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 4e06b9c127e76a..7be493d5992c4c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -1785,7 +1785,7 @@ LogicalResult ReturnOp::verify() {
 }
 
 //===----------------------------------------------------------------------===//
-// Verifier for LLVM::AddressOfOp.
+// LLVM::AddressOfOp.
 //===----------------------------------------------------------------------===//
 
 static Operation *parentLLVMModule(Operation *op) {
@@ -1826,6 +1826,11 @@ AddressOfOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
   return success();
 }
 
+// AddressOfOp constant-folds to the global symbol name.
+OpFoldResult LLVM::AddressOfOp::fold(FoldAdaptor) {
+  return getGlobalNameAttr();
+}
+
 //===----------------------------------------------------------------------===//
 // Verifier for LLVM::ComdatOp.
 //===----------------------------------------------------------------------===//
@@ -3258,6 +3263,12 @@ LogicalResult LLVMDialect::verifyRegionResultAttribute(Operation *op,
 
 Operation *LLVMDialect::materializeConstant(OpBuilder &builder, Attribute value,
                                             Type type, Location loc) {
+  // If this was folded from an llvm.mlir.addressof operation, it should be
+  // materialized as such.
+  if (auto symbol = dyn_cast<FlatSymbolRefAttr>(value))
+    if (isa<LLVM::LLVMPointerType>(type))
+      return builder.create<LLVM::AddressOfOp>(loc, type, symbol);
+  // Otherwise try materializing it as a regular llvm.mlir.constant op.
   return LLVM::ConstantOp::materialize(builder, value, type, loc);
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/constant-folding.mlir b/mlir/test/Dialect/LLVMIR/constant-folding.mlir
index f800f2690467da..454126321eb970 100644
--- a/mlir/test/Dialect/LLVMIR/constant-folding.mlir
+++ b/mlir/test/Dialect/LLVMIR/constant-folding.mlir
@@ -51,3 +51,53 @@ llvm.func @or_basic() -> i32 {
   // CHECK: llvm.return %[[RES]] : i32
   llvm.return %2 : i32
 }
+
+// -----
+
+// CHECK-LABEL: llvm.func @addressof
+llvm.func @addressof() {
+  // CHECK-NEXT: %[[ADDRESSOF:.+]] = llvm.mlir.addressof @foo
+  %0 = llvm.mlir.addressof @foo : !llvm.ptr
+  %1 = llvm.mlir.addressof @foo : !llvm.ptr
+  // CHECK-NEXT: llvm.call @bar(%[[ADDRESSOF]], %[[ADDRESSOF]])
+  llvm.call @bar(%0, %1) : (!llvm.ptr, !llvm.ptr) -> ()
+  // CHECK-NEXT: llvm.return
+  llvm.return
+}
+
+llvm.mlir.global constant @foo() : i32
+
+llvm.func @bar(!llvm.ptr, !llvm.ptr)
+
+// -----
+
+// CHECK-LABEL: llvm.func @addressof_select
+llvm.func @addressof_select(%arg: i1) -> !llvm.ptr {
+  // CHECK-NEXT: %[[ADDRESSOF:.+]] = llvm.mlir.addressof @foo
+  %0 = llvm.mlir.addressof @foo : !llvm.ptr
+  %1 = llvm.mlir.addressof @foo : !llvm.ptr
+  %2 = arith.select %arg, %0, %1 : !llvm.ptr
+  // CHECK-NEXT: llvm.return %[[ADDRESSOF]]
+  llvm.return %2 : !llvm.ptr
+}
+
+llvm.mlir.global constant @foo() : i32
+
+llvm.func @bar(!llvm.ptr, !llvm.ptr)
+
+// -----
+
+// CHECK-LABEL: llvm.func @addressof_blocks
+llvm.func @addressof_blocks(%arg: i1) -> !llvm.ptr {
+  // CHECK-NEXT: %[[ADDRESSOF:.+]] = llvm.mlir.addressof @foo
+  llvm.cond_br %arg, ^bb1, ^bb2
+^bb1:
+  %0 = llvm.mlir.addressof @foo : !llvm.ptr
+  llvm.return %0 : !llvm.ptr
+^bb2:
+  %1 = llvm.mlir.addressof @foo : !llvm.ptr
+  // CHECK: return %[[ADDRESSOF]]
+  llvm.return %1 : !llvm.ptr
+}
+
+llvm.mlir.global constant @foo() : i32

From c9d92d215e6bfe14997bb79e6d6a050d1b449843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= <andrzej.warzynski@arm.com>
Date: Tue, 30 Apr 2024 09:02:16 +0100
Subject: [PATCH 41/65] [mlir][test] Add TD example for peel+vectorize
 (depthwise conv) (#90200)

Adds an example that combines loop peeling and scalable vectorisation of
`linalg.depthwise_conv_2d_nhwc_hwc`. This is similar to
transform-op-peel-and-vectorize.mlir and is meant to demonstrate how to
avoid masking when vectorising using scalable vectors.
---
 .../transform-op-peel-and-vectorize-conv.mlir | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 mlir/test/Dialect/Linalg/transform-op-peel-and-vectorize-conv.mlir

diff --git a/mlir/test/Dialect/Linalg/transform-op-peel-and-vectorize-conv.mlir b/mlir/test/Dialect/Linalg/transform-op-peel-and-vectorize-conv.mlir
new file mode 100644
index 00000000000000..7f3997633a307d
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/transform-op-peel-and-vectorize-conv.mlir
@@ -0,0 +1,83 @@
+// RUN: mlir-opt %s --transform-interpreter --split-input-file -resolve-shaped-type-result-dims -canonicalize | FileCheck %s
+
+// Demonstrates what happens when peeling the 4th loop (that corresponds to the
+// "depth" dimension in depthwise convs) followed by vectorization in the
+// presence of _scalable_ vectors (these are introduced through scalable
+// tiling). The main goal is to verify that canonicalizations fold away the
+// masks in the main loop.
+
+func.func @conv(%arg0: tensor<1x1080x1962x48xi32>, %arg1: tensor<1x43x48xi32>) -> tensor<1x1080x1920x48xi32> {
+// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (-(48 mod s0) + 48)>
+
+// CHECK-LABEL:   func.func @conv(
+// CHECK-DAG:       %[[C_43:.*]] = arith.constant 43 : index
+// CHECK-DAG:       %[[C_48:.*]] = arith.constant 48 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C4:.*]] = arith.constant 4 : index
+// CHECK:           %[[VSCALE:.*]] = vector.vscale
+// CHECK:           %[[VSCALE_X_4:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index
+
+// Loop over the channel/depth dim - the main part after vectorisation (vectorized, no masking)
+// CHECK:               %[[UB_DEPTH_LOOP:.*]] = affine.apply #[[$MAP]](){{\[}}%[[VSCALE_X_4]]]
+// CHECK-NEXT:          %[[VAL_21:.*]] = scf.for {{.*}} to %[[UB_DEPTH_LOOP]] step %[[VSCALE_X_4]]
+// Loop over the Filter width dim
+// CHECK:                 scf.for %{{.*}} = %[[C0]] to %[[C_43]] step %[[C1]] {{.*}} -> (tensor<1x1x4x?xi32>) {
+// CHECK-NOT:               vector.mask
+// CHECK:                   vector.broadcast {{.*}} : vector<[4]xi32> to vector<1x4x[4]xi32>
+// CHECK-NEXT:              arith.muli {{.*}} : vector<1x4x[4]xi32>
+// CHECK-NEXT:              arith.addi {{.*}} : vector<1x4x[4]xi32>
+// CHECK-NOT:               vector.mask
+// CHECK:                   scf.yield {{.*}} : tensor<1x1x4x?xi32>
+// CHECK:                 }
+// CHECK:                 tensor.insert_slice {{.*}}  tensor<1x1x4x?xi32> into tensor<1x1080x1920x48xi32>
+// CHECK:                 scf.yield {{.*}} : tensor<1x1080x1920x48xi32>
+
+// CHECK-NEXT:          }
+
+// Loop over the channel/depth dim - the remainder part (not vectorized)
+// CHECK:               scf.for {{.*}} to %[[C_48]] step %[[VSCALE_X_4]]
+// Loop over the Filter width dim
+// CHECK:                 scf.for %{{.*}} = %[[C0]] to %[[C_43]] step %[[C1]] {{.*}} -> (tensor<1x1x4x?xi32>) {
+// CHECK:                   linalg.depthwise_conv_1d_nwc_wc {{.*}} -> tensor<1x4x?xi32>
+// CHECK:                   scf.yield %{{.*}} : tensor<1x1x4x?xi32>
+// CHECK:                 }
+// CHECK:                 tensor.insert_slice {{.*}} tensor<1x1x4x?xi32> into tensor<1x1080x1920x48xi32>
+// CHECK-NEXT:            scf.yield %{{.*}} : tensor<1x1080x1920x48xi32>
+// CHECK-NEXT:          }
+
+
+  %0 = tensor.empty() : tensor<1x1080x1920x48xi32>
+  %c0_i32 = arith.constant 0 : i32
+  %1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<1x1080x1920x48xi32>) -> tensor<1x1080x1920x48xi32>
+  %2 = linalg.depthwise_conv_2d_nhwc_hwc {
+    dilations = dense<1> : tensor<2xi64>,
+    strides = dense<1> : tensor<2xi64>}
+    ins(%arg0, %arg1 : tensor<1x1080x1962x48xi32>, tensor<1x43x48xi32>) outs(%1 : tensor<1x1080x1920x48xi32>) -> tensor<1x1080x1920x48xi32>
+  return %2 : tensor<1x1080x1920x48xi32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.consume}) {
+    // 1. Tile parallel dims
+    %1 = transform.structured.match ops{["linalg.depthwise_conv_2d_nhwc_hwc"]} in %root : (!transform.any_op) -> !transform.any_op
+    %tiled_linalg_op_0, %loops_1:4 = transform.structured.tile_using_for %1[1, 1, 4, [4], 0, 0] : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.op<"scf.for">, !transform.op<"scf.for">, !transform.op<"scf.for">)
+
+    // 2. Tile reduction dims
+    %2 = transform.structured.match ops{["linalg.depthwise_conv_2d_nhwc_hwc"]} in %loops_1#3 : (!transform.op<"scf.for">) -> !transform.any_op
+    %tiled_linalg_op_1, %loops_2:2 = transform.structured.tile_using_for %2[0, 0, 0, 0, 1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+    // 3. Decompose 2D conv into 2 x 1D conv
+    %3 = transform.structured.match ops{["linalg.depthwise_conv_2d_nhwc_hwc"]} in %loops_1#3 : (!transform.op<"scf.for">) -> !transform.any_op
+    %4 = transform.structured.decompose %3 : (!transform.any_op) -> !transform.any_op
+
+    // 4. Apply loop peeling - only the 4th loop
+    %main_loop, %remainder_loop = transform.loop.peel %loops_1#3 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">, !transform.op<"scf.for">)
+    %5 = transform.structured.match ops{["linalg.depthwise_conv_1d_nwc_wc"]} in %main_loop : (!transform.op<"scf.for">) -> !transform.any_op
+
+    // 5. Vectorize, but only the main loop
+    transform.structured.vectorize %5 vector_sizes [2, 4, [4], 16] : !transform.any_op
+
+    transform.yield
+  }
+}

From 74e65eec48ee87c34e06a09ad25a1029506dd60d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 30 Apr 2024 06:18:31 +0200
Subject: [PATCH 42/65] [clang][Interp] Handle Shifts in OpenCL correctly

We need to adjust the RHS to account for the LHS bitwidth.
---
 clang/lib/AST/Interp/Interp.h   | 14 ++++++++++++--
 clang/test/AST/Interp/opencl.cl |  3 +++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index 9da0286deada17..2b650d684be9c4 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -1935,10 +1935,15 @@ template <PrimType NameL, PrimType NameR>
 inline bool Shr(InterpState &S, CodePtr OpPC) {
   using LT = typename PrimConv<NameL>::T;
   using RT = typename PrimConv<NameR>::T;
-  const auto &RHS = S.Stk.pop<RT>();
+  auto RHS = S.Stk.pop<RT>();
   const auto &LHS = S.Stk.pop<LT>();
   const unsigned Bits = LHS.bitWidth();
 
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (S.getLangOpts().OpenCL)
+    RT::bitAnd(RHS, RT::from(LHS.bitWidth() - 1, RHS.bitWidth()),
+               RHS.bitWidth(), &RHS);
+
   if (!CheckShift(S, OpPC, LHS, RHS, Bits))
     return false;
 
@@ -1960,10 +1965,15 @@ template <PrimType NameL, PrimType NameR>
 inline bool Shl(InterpState &S, CodePtr OpPC) {
   using LT = typename PrimConv<NameL>::T;
   using RT = typename PrimConv<NameR>::T;
-  const auto &RHS = S.Stk.pop<RT>();
+  auto RHS = S.Stk.pop<RT>();
   const auto &LHS = S.Stk.pop<LT>();
   const unsigned Bits = LHS.bitWidth();
 
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (S.getLangOpts().OpenCL)
+    RT::bitAnd(RHS, RT::from(LHS.bitWidth() - 1, RHS.bitWidth()),
+               RHS.bitWidth(), &RHS);
+
   if (!CheckShift(S, OpPC, LHS, RHS, Bits))
     return false;
 
diff --git a/clang/test/AST/Interp/opencl.cl b/clang/test/AST/Interp/opencl.cl
index b9ba4f8b9b555a..e7b9ec5caf2b1e 100644
--- a/clang/test/AST/Interp/opencl.cl
+++ b/clang/test/AST/Interp/opencl.cl
@@ -30,3 +30,6 @@ void foo(int3 arg1, int8 arg2) {
   int res12[vec_step(void) == 1 ? 1 : -1];
 }
 
+void negativeShift32(int a,int b) {
+  char array0[((int)1)<<40];
+}

From 29dda26c65fc50066792f558e95f9603a7d7effc Mon Sep 17 00:00:00 2001
From: Danial Klimkin <dklimkin@google.com>
Date: Tue, 30 Apr 2024 10:06:28 +0200
Subject: [PATCH 43/65] Fix lock guads in PipePosix.cpp (#90572)

Guard object destroyed immediately after creation without naming.
---
 lldb/source/Host/posix/PipePosix.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Host/posix/PipePosix.cpp b/lldb/source/Host/posix/PipePosix.cpp
index afd3fe39059ac1..f35c348990df62 100644
--- a/lldb/source/Host/posix/PipePosix.cpp
+++ b/lldb/source/Host/posix/PipePosix.cpp
@@ -108,7 +108,7 @@ Status PipePosix::CreateNew(bool child_processes_inherit) {
 }
 
 Status PipePosix::CreateNew(llvm::StringRef name, bool child_process_inherit) {
-  std::scoped_lock<std::mutex, std::mutex> (m_read_mutex, m_write_mutex);
+  std::scoped_lock<std::mutex, std::mutex> guard(m_read_mutex, m_write_mutex);
   if (CanReadUnlocked() || CanWriteUnlocked())
     return Status("Pipe is already opened");
 
@@ -146,7 +146,7 @@ Status PipePosix::CreateWithUniqueName(llvm::StringRef prefix,
 
 Status PipePosix::OpenAsReader(llvm::StringRef name,
                                bool child_process_inherit) {
-  std::scoped_lock<std::mutex, std::mutex> (m_read_mutex, m_write_mutex);
+  std::scoped_lock<std::mutex, std::mutex> guard(m_read_mutex, m_write_mutex);
 
   if (CanReadUnlocked() || CanWriteUnlocked())
     return Status("Pipe is already opened");

From eaee8aa0afe111f9291d54ecef97a3640a0f6ce0 Mon Sep 17 00:00:00 2001
From: Qizhi Hu <836744285@qq.com>
Date: Tue, 30 Apr 2024 16:09:09 +0800
Subject: [PATCH 44/65] [Clang][Sema] fix a bug on template partial
 specialization (#89862)

attempt to fix
https://github.com/llvm/llvm-project/issues/68885#issuecomment-1764201896
Deduction of NTTP whose type is `decltype(auto)` would create an
implicit cast expression to dependent type and makes the type of primary
template definition (`InjectedClassNameSpecialization`) and its partial
specialization different. Prevent emitting cast expression to make clang
knows their types are identical by removing `CTAK == CTAK_Deduced` when
the type is `decltype(auto)`.

Co-authored-by: huqizhi <836744285@qq.com>
---
 clang/docs/ReleaseNotes.rst                         |  1 +
 clang/lib/Sema/SemaTemplate.cpp                     |  2 +-
 ...dentical-type-primary-partial-specialization.cpp | 13 +++++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/SemaCXX/identical-type-primary-partial-specialization.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c0fe5bcf6b122..98c80b6017f604 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -611,6 +611,7 @@ Bug Fixes to C++ Support
   immediate function context.
 - Fix CTAD for ``std::initializer_list``. This allows ``std::initializer_list{1, 2, 3}`` to be deduced as
   ``std::initializer_list<int>`` as intended.
+- Fix a bug on template partial specialization whose template parameter is `decltype(auto)`.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index bbcb7c33a98579..ed5507c0ec0100 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -7706,7 +7706,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
     // FIXME: The language rules don't say what happens in this case.
     // FIXME: We get an opaque dependent type out of decltype(auto) if the
     // expression is merely instantiation-dependent; is this enough?
-    if (CTAK == CTAK_Deduced && Arg->isTypeDependent()) {
+    if (Arg->isTypeDependent()) {
       auto *AT = dyn_cast<AutoType>(DeducedT);
       if (AT && AT->isDecltypeAuto()) {
         SugaredConverted = TemplateArgument(Arg);
diff --git a/clang/test/SemaCXX/identical-type-primary-partial-specialization.cpp b/clang/test/SemaCXX/identical-type-primary-partial-specialization.cpp
new file mode 100644
index 00000000000000..ad51ca8252ef50
--- /dev/null
+++ b/clang/test/SemaCXX/identical-type-primary-partial-specialization.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++17 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s
+
+template <decltype(auto) a>
+struct S { // expected-note {{previous definition is here}}
+    static constexpr int i = 42;
+};
+
+template <decltype(auto) a>
+struct S<a> { // expected-error {{class template partial specialization does not specialize any template argument; to define the primary template, remove the template argument list}} \
+              // expected-error {{redefinition of 'S'}}
+    static constexpr int i = 0;
+};

From a413c563bdcaac08f7c325c7d69e19f924435e59 Mon Sep 17 00:00:00 2001
From: Qizhi Hu <836744285@qq.com>
Date: Tue, 30 Apr 2024 16:15:06 +0800
Subject: [PATCH 45/65] [Clang][Sema] Fix a bug on template partial
 specialization with issue on deduction of nontype template parameter (#90376)

Fix https://github.com/llvm/llvm-project/issues/68885
When build expression from a deduced argument whose kind is
`Declaration` and `NTTPType`(which declared as `decltype(auto)`) is
deduced as a reference type, `BuildExpressionFromDeclTemplateArgument`
just create a `DeclRef`. This is incorrect while we get type from the
expression since we can't get the original reference type from
`DeclRef`. Creating a `SubstNonTypeTemplateParmExpr` expression and make
the deduction correct. `Replacement` expression of
`SubstNonTypeTemplateParmExpr` just helps the deduction and may not be
same with the original expression.

Co-authored-by: huqizhi <836744285@qq.com>
---
 clang/docs/ReleaseNotes.rst              |  2 ++
 clang/include/clang/Sema/Sema.h          | 10 ++++++----
 clang/lib/Sema/SemaTemplate.cpp          | 19 +++++++++++++++----
 clang/lib/Sema/SemaTemplateDeduction.cpp | 10 ++++++----
 clang/test/SemaCXX/PR68885.cpp           | 21 +++++++++++++++++++++
 5 files changed, 50 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/SemaCXX/PR68885.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 98c80b6017f604..1abc00a25f1f42 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -612,6 +612,8 @@ Bug Fixes to C++ Support
 - Fix CTAD for ``std::initializer_list``. This allows ``std::initializer_list{1, 2, 3}`` to be deduced as
   ``std::initializer_list<int>`` as intended.
 - Fix a bug on template partial specialization whose template parameter is `decltype(auto)`.
+- Fix a bug on template partial specialization with issue on deduction of nontype template parameter
+  whose type is `decltype(auto)`. Fixes (#GH68885).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 1ca523ec88c2f9..809b9c4498f697 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9249,7 +9249,8 @@ class Sema final : public SemaBase {
   void NoteTemplateParameterLocation(const NamedDecl &Decl);
 
   ExprResult BuildExpressionFromDeclTemplateArgument(
-      const TemplateArgument &Arg, QualType ParamType, SourceLocation Loc);
+      const TemplateArgument &Arg, QualType ParamType, SourceLocation Loc,
+      NamedDecl *TemplateParam = nullptr);
   ExprResult
   BuildExpressionFromNonTypeTemplateArgument(const TemplateArgument &Arg,
                                              SourceLocation Loc);
@@ -9572,9 +9573,10 @@ class Sema final : public SemaBase {
 
   bool isSameOrCompatibleFunctionType(QualType Param, QualType Arg);
 
-  TemplateArgumentLoc getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
-                                                    QualType NTTPType,
-                                                    SourceLocation Loc);
+  TemplateArgumentLoc
+  getTrivialTemplateArgumentLoc(const TemplateArgument &Arg, QualType NTTPType,
+                                SourceLocation Loc,
+                                NamedDecl *TemplateParam = nullptr);
 
   /// Get a template argument mapping the given template parameter to itself,
   /// e.g. for X in \c template<int X>, this would return an expression template
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index ed5507c0ec0100..3c2a5a4ac47e69 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -8438,10 +8438,9 @@ void Sema::NoteTemplateParameterLocation(const NamedDecl &Decl) {
 /// declaration and the type of its corresponding non-type template
 /// parameter, produce an expression that properly refers to that
 /// declaration.
-ExprResult
-Sema::BuildExpressionFromDeclTemplateArgument(const TemplateArgument &Arg,
-                                              QualType ParamType,
-                                              SourceLocation Loc) {
+ExprResult Sema::BuildExpressionFromDeclTemplateArgument(
+    const TemplateArgument &Arg, QualType ParamType, SourceLocation Loc,
+    NamedDecl *TemplateParam) {
   // C++ [temp.param]p8:
   //
   //   A non-type template-parameter of type "array of T" or
@@ -8508,6 +8507,18 @@ Sema::BuildExpressionFromDeclTemplateArgument(const TemplateArgument &Arg,
   } else {
     assert(ParamType->isReferenceType() &&
            "unexpected type for decl template argument");
+    if (NonTypeTemplateParmDecl *NTTP =
+            dyn_cast_if_present<NonTypeTemplateParmDecl>(TemplateParam)) {
+      QualType TemplateParamType = NTTP->getType();
+      const AutoType *AT = TemplateParamType->getAs<AutoType>();
+      if (AT && AT->isDecltypeAuto()) {
+        RefExpr = new (getASTContext()) SubstNonTypeTemplateParmExpr(
+            ParamType->getPointeeType(), RefExpr.get()->getValueKind(),
+            RefExpr.get()->getExprLoc(), RefExpr.get(), VD, NTTP->getIndex(),
+            /*PackIndex=*/std::nullopt,
+            /*RefParam=*/true);
+      }
+    }
   }
 
   // At this point we should have the right value category.
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index c3815bca038554..e93f7bd842e444 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2639,7 +2639,8 @@ static bool isSameTemplateArg(ASTContext &Context,
 /// argument.
 TemplateArgumentLoc
 Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
-                                    QualType NTTPType, SourceLocation Loc) {
+                                    QualType NTTPType, SourceLocation Loc,
+                                    NamedDecl *TemplateParam) {
   switch (Arg.getKind()) {
   case TemplateArgument::Null:
     llvm_unreachable("Can't get a NULL template argument here");
@@ -2651,7 +2652,8 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg,
   case TemplateArgument::Declaration: {
     if (NTTPType.isNull())
       NTTPType = Arg.getParamTypeForDecl();
-    Expr *E = BuildExpressionFromDeclTemplateArgument(Arg, NTTPType, Loc)
+    Expr *E = BuildExpressionFromDeclTemplateArgument(Arg, NTTPType, Loc,
+                                                      TemplateParam)
                   .getAs<Expr>();
     return TemplateArgumentLoc(TemplateArgument(E), E);
   }
@@ -2718,8 +2720,8 @@ static bool ConvertDeducedTemplateArgument(
     // Convert the deduced template argument into a template
     // argument that we can check, almost as if the user had written
     // the template argument explicitly.
-    TemplateArgumentLoc ArgLoc =
-        S.getTrivialTemplateArgumentLoc(Arg, QualType(), Info.getLocation());
+    TemplateArgumentLoc ArgLoc = S.getTrivialTemplateArgumentLoc(
+        Arg, QualType(), Info.getLocation(), Param);
 
     // Check the template argument, converting it as necessary.
     return S.CheckTemplateArgument(
diff --git a/clang/test/SemaCXX/PR68885.cpp b/clang/test/SemaCXX/PR68885.cpp
new file mode 100644
index 00000000000000..4ff95771e2caf5
--- /dev/null
+++ b/clang/test/SemaCXX/PR68885.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s
+
+// expected-no-diagnostics
+
+template <decltype(auto) a>
+struct S {
+    static constexpr int i = 42;
+};
+
+template <decltype(auto) a> requires true
+struct S<a> {
+    static constexpr int i = 0;
+};
+
+static constexpr int a = 0;
+
+void test() {
+    static_assert(S<a>::i == 0);
+    static_assert(S<(a)>::i == 0);
+    static_assert(S<((a))>::i == 0);
+}

From 64248d7dee09fef4900058ba98a67feb68eb617c Mon Sep 17 00:00:00 2001
From: Daniil Kovalev <dkovalev@accesssoftek.com>
Date: Tue, 30 Apr 2024 11:15:35 +0300
Subject: [PATCH 46/65] [PAC][lldb][Dwarf] Support `__ptrauth`-qualified types
 in user expressions (#84387)

Depends on #84384 and #90329

This adds support for `DW_TAG_LLVM_ptrauth_type` entries corresponding
to explicitly signed types (e.g. free function pointers) in lldb user
expressions. Applies PR https://github.com/apple/llvm-project/pull/8239
from Apple's downstream and also adds tests and related code.

---------

Co-authored-by: Jonas Devlieghere <jonas@devlieghere.com>
---
 lldb/include/lldb/Symbol/CompilerType.h       |  12 ++
 lldb/include/lldb/Symbol/Type.h               |   4 +-
 lldb/include/lldb/Symbol/TypeSystem.h         |  11 ++
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |  41 +++++-
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  42 ++++++
 .../TypeSystem/Clang/TypeSystemClang.h        |  11 +-
 lldb/source/Symbol/CompilerType.cpp           |  28 ++++
 lldb/source/Symbol/Type.cpp                   |  18 ++-
 lldb/source/Symbol/TypeSystem.cpp             |   5 +
 .../DWARF/DWARFASTParserClangTests.cpp        | 135 ++++++++++++++++++
 10 files changed, 302 insertions(+), 5 deletions(-)

diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h
index 9e889a53086b2a..28c723abf27948 100644
--- a/lldb/include/lldb/Symbol/CompilerType.h
+++ b/lldb/include/lldb/Symbol/CompilerType.h
@@ -262,6 +262,12 @@ class CompilerType {
   size_t GetPointerByteSize() const;
   /// \}
 
+  unsigned GetPtrAuthKey() const;
+
+  unsigned GetPtrAuthDiscriminator() const;
+
+  bool GetPtrAuthAddressDiversity() const;
+
   /// Accessors.
   /// \{
 
@@ -369,6 +375,12 @@ class CompilerType {
 
   /// Create related types using the current type's AST
   CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) const;
+
+  /// Return a new CompilerType adds a ptrauth modifier from the given 32-bit
+  /// opaque payload to this type if this type is valid and the type system
+  /// supports ptrauth modifiers, else return an invalid type. Note that this
+  /// does not check if this type is a pointer.
+  CompilerType AddPtrAuthModifier(uint32_t payload) const;
   /// \}
 
   /// Exploring the type.
diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index b5eac5fa732d67..1c4f7b5601b0c3 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -401,7 +401,9 @@ class Type : public std::enable_shared_from_this<Type>, public UserID {
     /// This type is the type whose UID is m_encoding_uid as an atomic type.
     eEncodingIsAtomicUID,
     /// This type is the synthetic type whose UID is m_encoding_uid.
-    eEncodingIsSyntheticUID
+    eEncodingIsSyntheticUID,
+    /// This type is a signed pointer.
+    eEncodingIsLLVMPtrAuthUID
   };
 
   enum class ResolveState : unsigned char {
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index 0924e21a6b54a4..7bcb8d69387a02 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -221,6 +221,14 @@ class TypeSystem : public PluginInterface,
 
   virtual uint32_t GetPointerByteSize() = 0;
 
+  virtual unsigned GetPtrAuthKey(lldb::opaque_compiler_type_t type) = 0;
+
+  virtual unsigned
+  GetPtrAuthDiscriminator(lldb::opaque_compiler_type_t type) = 0;
+
+  virtual bool
+  GetPtrAuthAddressDiversity(lldb::opaque_compiler_type_t type) = 0;
+
   // Accessors
 
   virtual ConstString GetTypeName(lldb::opaque_compiler_type_t type,
@@ -285,6 +293,9 @@ class TypeSystem : public PluginInterface,
 
   virtual CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type);
 
+  virtual CompilerType AddPtrAuthModifier(lldb::opaque_compiler_type_t type,
+                                          uint32_t payload);
+
   /// \param opaque_payload      The m_payload field of Type, which may
   /// carry TypeSystem-specific extra information.
   virtual CompilerType CreateTypedef(lldb::opaque_compiler_type_t type,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 12dafd3f5d5d51..bea11e0e3840af 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -570,6 +570,39 @@ ExtractDataMemberLocation(DWARFDIE const &die, DWARFFormValue const &form_value,
   return memberOffset.ResolveValue(nullptr).UInt();
 }
 
+static TypePayloadClang GetPtrAuthMofidierPayload(const DWARFDIE &die) {
+  auto getAttr = [&](llvm::dwarf::Attribute Attr, unsigned defaultValue = 0) {
+    return die.GetAttributeValueAsUnsigned(Attr, defaultValue);
+  };
+  const unsigned key = getAttr(DW_AT_LLVM_ptrauth_key);
+  const bool addr_disc = getAttr(DW_AT_LLVM_ptrauth_address_discriminated);
+  const unsigned extra = getAttr(DW_AT_LLVM_ptrauth_extra_discriminator);
+  const bool isapointer = getAttr(DW_AT_LLVM_ptrauth_isa_pointer);
+  const bool authenticates_null_values =
+      getAttr(DW_AT_LLVM_ptrauth_authenticates_null_values);
+  const unsigned authentication_mode_int = getAttr(
+      DW_AT_LLVM_ptrauth_authentication_mode,
+      static_cast<unsigned>(clang::PointerAuthenticationMode::SignAndAuth));
+  clang::PointerAuthenticationMode authentication_mode =
+      clang::PointerAuthenticationMode::SignAndAuth;
+  if (authentication_mode_int >=
+          static_cast<unsigned>(clang::PointerAuthenticationMode::None) &&
+      authentication_mode_int <=
+          static_cast<unsigned>(
+              clang::PointerAuthenticationMode::SignAndAuth)) {
+    authentication_mode =
+        static_cast<clang::PointerAuthenticationMode>(authentication_mode_int);
+  } else {
+    die.GetDWARF()->GetObjectFile()->GetModule()->ReportError(
+        "[{0:x16}]: invalid pointer authentication mode method {1:x4}",
+        die.GetOffset(), authentication_mode_int);
+  }
+  auto ptr_auth = clang::PointerAuthQualifier::Create(
+      key, addr_disc, extra, authentication_mode, isapointer,
+      authenticates_null_values);
+  return TypePayloadClang(ptr_auth.getAsOpaqueValue());
+}
+
 lldb::TypeSP
 DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
                                        const DWARFDIE &die,
@@ -580,6 +613,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
   LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU());
   Type::ResolveState resolve_state = Type::ResolveState::Unresolved;
   Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID;
+  TypePayloadClang payload(GetOwningClangModule(die));
   TypeSP type_sp;
   CompilerType clang_type;
 
@@ -677,6 +711,10 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
   case DW_TAG_volatile_type:
     encoding_data_type = Type::eEncodingIsVolatileUID;
     break;
+  case DW_TAG_LLVM_ptrauth_type:
+    encoding_data_type = Type::eEncodingIsLLVMPtrAuthUID;
+    payload = GetPtrAuthMofidierPayload(die);
+    break;
   case DW_TAG_atomic_type:
     encoding_data_type = Type::eEncodingIsAtomicUID;
     break;
@@ -786,8 +824,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc,
 
   type_sp = dwarf->MakeType(die.GetID(), attrs.name, attrs.byte_size, nullptr,
                             attrs.type.Reference().GetID(), encoding_data_type,
-                            &attrs.decl, clang_type, resolve_state,
-                            TypePayloadClang(GetOwningClangModule(die)));
+                            &attrs.decl, clang_type, resolve_state, payload);
 
   dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
   return type_sp;
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index ec005ac46b34f3..3bdb288e97dd6a 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -2923,6 +2923,35 @@ bool TypeSystemClang::IsCStringType(lldb::opaque_compiler_type_t type,
   return false;
 }
 
+unsigned TypeSystemClang::GetPtrAuthKey(lldb::opaque_compiler_type_t type) {
+  if (type) {
+    clang::QualType qual_type(GetCanonicalQualType(type));
+    if (auto pointer_auth = qual_type.getPointerAuth())
+      return pointer_auth.getKey();
+  }
+  return 0;
+}
+
+unsigned
+TypeSystemClang::GetPtrAuthDiscriminator(lldb::opaque_compiler_type_t type) {
+  if (type) {
+    clang::QualType qual_type(GetCanonicalQualType(type));
+    if (auto pointer_auth = qual_type.getPointerAuth())
+      return pointer_auth.getExtraDiscriminator();
+  }
+  return 0;
+}
+
+bool TypeSystemClang::GetPtrAuthAddressDiversity(
+    lldb::opaque_compiler_type_t type) {
+  if (type) {
+    clang::QualType qual_type(GetCanonicalQualType(type));
+    if (auto pointer_auth = qual_type.getPointerAuth())
+      return pointer_auth.isAddressDiscriminated();
+  }
+  return false;
+}
+
 bool TypeSystemClang::IsFunctionType(lldb::opaque_compiler_type_t type) {
   auto isFunctionType = [&](clang::QualType qual_type) {
     return qual_type->isFunctionType();
@@ -4511,6 +4540,19 @@ TypeSystemClang::AddConstModifier(lldb::opaque_compiler_type_t type) {
   return CompilerType();
 }
 
+CompilerType
+TypeSystemClang::AddPtrAuthModifier(lldb::opaque_compiler_type_t type,
+                                    uint32_t payload) {
+  if (type) {
+    clang::ASTContext &clang_ast = getASTContext();
+    auto pauth = PointerAuthQualifier::fromOpaqueValue(payload);
+    clang::QualType result =
+        clang_ast.getPointerAuthType(GetQualType(type), pauth);
+    return GetType(result);
+  }
+  return CompilerType();
+}
+
 CompilerType
 TypeSystemClang::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
   if (type) {
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index 8a7d45254bcf52..59ca69622d9e82 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -67,11 +67,13 @@ class OptionalClangModuleID {
 
 /// The implementation of lldb::Type's m_payload field for TypeSystemClang.
 class TypePayloadClang {
-  /// The Layout is as follows:
+  /// The payload is used for typedefs and ptrauth types.
+  /// For typedefs, the Layout is as follows:
   /// \verbatim
   /// bit 0..30 ... Owning Module ID.
   /// bit 31 ...... IsCompleteObjCClass.
   /// \endverbatim
+  /// For ptrauth types, we store the PointerAuthQualifier as an opaque value.
   Type::Payload m_payload = 0;
 
 public:
@@ -653,6 +655,10 @@ class TypeSystemClang : public TypeSystem {
   bool IsFloatingPointType(lldb::opaque_compiler_type_t type, uint32_t &count,
                            bool &is_complex) override;
 
+  unsigned GetPtrAuthKey(lldb::opaque_compiler_type_t type) override;
+  unsigned GetPtrAuthDiscriminator(lldb::opaque_compiler_type_t type) override;
+  bool GetPtrAuthAddressDiversity(lldb::opaque_compiler_type_t type) override;
+
   bool IsFunctionType(lldb::opaque_compiler_type_t type) override;
 
   uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
@@ -793,6 +799,9 @@ class TypeSystemClang : public TypeSystem {
 
   CompilerType AddConstModifier(lldb::opaque_compiler_type_t type) override;
 
+  CompilerType AddPtrAuthModifier(lldb::opaque_compiler_type_t type,
+                                  uint32_t payload) override;
+
   CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type) override;
 
   CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type) override;
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index 9523fb6ea77c20..072dbccec44fba 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -108,6 +108,27 @@ bool CompilerType::IsConst() const {
   return false;
 }
 
+unsigned CompilerType::GetPtrAuthKey() const {
+  if (IsValid())
+    if (auto type_system_sp = GetTypeSystem())
+      return type_system_sp->GetPtrAuthKey(m_type);
+  return 0;
+}
+
+unsigned CompilerType::GetPtrAuthDiscriminator() const {
+  if (IsValid())
+    if (auto type_system_sp = GetTypeSystem())
+      return type_system_sp->GetPtrAuthDiscriminator(m_type);
+  return 0;
+}
+
+bool CompilerType::GetPtrAuthAddressDiversity() const {
+  if (IsValid())
+    if (auto type_system_sp = GetTypeSystem())
+      return type_system_sp->GetPtrAuthAddressDiversity(m_type);
+  return false;
+}
+
 bool CompilerType::IsFunctionType() const {
   if (IsValid())
     if (auto type_system_sp = GetTypeSystem())
@@ -664,6 +685,13 @@ CompilerType CompilerType::GetPointerType() const {
   return CompilerType();
 }
 
+CompilerType CompilerType::AddPtrAuthModifier(uint32_t payload) const {
+  if (IsValid())
+    if (auto type_system_sp = GetTypeSystem())
+      return type_system_sp->AddPtrAuthModifier(m_type, payload);
+  return CompilerType();
+}
+
 CompilerType CompilerType::GetLValueReferenceType() const {
   if (IsValid())
     if (auto type_system_sp = GetTypeSystem())
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index dcfd1238f6d789..b85c38097ebe28 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -355,6 +355,9 @@ void Type::GetDescription(Stream *s, lldb::DescriptionLevel level,
     case eEncodingIsSyntheticUID:
       s->PutCString(" (synthetic type)");
       break;
+    case eEncodingIsLLVMPtrAuthUID:
+      s->PutCString(" (ptrauth type)");
+      break;
     }
   }
 }
@@ -416,6 +419,8 @@ void Type::Dump(Stream *s, bool show_context, lldb::DescriptionLevel level) {
     case eEncodingIsSyntheticUID:
       s->PutCString(" (synthetic type)");
       break;
+    case eEncodingIsLLVMPtrAuthUID:
+      s->PutCString(" (ptrauth type)");
     }
   }
 
@@ -477,7 +482,8 @@ std::optional<uint64_t> Type::GetByteSize(ExecutionContextScope *exe_scope) {
     // If we are a pointer or reference, then this is just a pointer size;
     case eEncodingIsPointerUID:
     case eEncodingIsLValueReferenceUID:
-    case eEncodingIsRValueReferenceUID: {
+    case eEncodingIsRValueReferenceUID:
+    case eEncodingIsLLVMPtrAuthUID: {
       if (ArchSpec arch = m_symbol_file->GetObjectFile()->GetArchitecture()) {
         m_byte_size = arch.GetAddressByteSize();
         m_byte_size_has_value = true;
@@ -621,6 +627,12 @@ bool Type::ResolveCompilerType(ResolveState compiler_type_resolve_state) {
             encoding_type->GetForwardCompilerType().GetRValueReferenceType();
         break;
 
+      case eEncodingIsLLVMPtrAuthUID:
+        m_compiler_type =
+            encoding_type->GetForwardCompilerType().AddPtrAuthModifier(
+                m_payload);
+        break;
+
       default:
         llvm_unreachable("Unhandled encoding_data_type.");
       }
@@ -676,6 +688,10 @@ bool Type::ResolveCompilerType(ResolveState compiler_type_resolve_state) {
           m_compiler_type = void_compiler_type.GetRValueReferenceType();
           break;
 
+        case eEncodingIsLLVMPtrAuthUID:
+          llvm_unreachable("Cannot handle eEncodingIsLLVMPtrAuthUID without "
+                           "valid encoding_type");
+
         default:
           llvm_unreachable("Unhandled encoding_data_type.");
         }
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 59b1b39e635ac5..3665771b188906 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -93,6 +93,11 @@ CompilerType TypeSystem::AddConstModifier(lldb::opaque_compiler_type_t type) {
   return CompilerType();
 }
 
+CompilerType TypeSystem::AddPtrAuthModifier(lldb::opaque_compiler_type_t type,
+                                            uint32_t payload) {
+  return CompilerType();
+}
+
 CompilerType
 TypeSystem::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
   return CompilerType();
diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
index 0c2a0d735dc837..f7a4b8f1ac59be 100644
--- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp
@@ -287,6 +287,141 @@ TEST_F(DWARFASTParserClangTests, TestCallingConventionParsing) {
   ASSERT_EQ(found_function_types, expected_function_types);
 }
 
+TEST_F(DWARFASTParserClangTests, TestPtrAuthParsing) {
+  // Tests parsing values with type DW_TAG_LLVM_ptrauth_type corresponding to
+  // explicitly signed raw function pointers
+
+  // This is Dwarf for the following C code:
+  // ```
+  // void (*__ptrauth(0, 0, 42) a)();
+  // ```
+
+  const char *yamldata = R"(
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_AARCH64
+DWARF:
+  debug_str:
+    - a
+  debug_abbrev:
+    - ID:              0
+      Table:
+        - Code:            0x01
+          Tag:             DW_TAG_compile_unit
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_language
+              Form:            DW_FORM_data2
+        - Code:            0x02
+          Tag:             DW_TAG_variable
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref4
+            - Attribute:       DW_AT_external
+              Form:            DW_FORM_flag_present
+        - Code:            0x03
+          Tag:             DW_TAG_LLVM_ptrauth_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref4
+            - Attribute:       DW_AT_LLVM_ptrauth_key
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_LLVM_ptrauth_extra_discriminator
+              Form:            DW_FORM_data2
+        - Code:            0x04
+          Tag:             DW_TAG_pointer_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref4
+        - Code:            0x05
+          Tag:             DW_TAG_subroutine_type
+          Children:        DW_CHILDREN_yes
+        - Code:            0x06
+          Tag:             DW_TAG_unspecified_parameters
+          Children:        DW_CHILDREN_no
+
+  debug_info:
+    - Version:         5
+      UnitType:        DW_UT_compile
+      AddrSize:        8
+      Entries:
+# 0x0c: DW_TAG_compile_unit
+#         DW_AT_language [DW_FORM_data2]    (DW_LANG_C99)
+        - AbbrCode:        0x01
+          Values:
+            - Value:           0x0c
+
+# 0x0f:   DW_TAG_variable
+#           DW_AT_name [DW_FORM_strp]       (\"a\")
+#           DW_AT_type [DW_FORM_ref4]       (0x00000018 \"void (*__ptrauth(0, 0, 0x02a)\")
+#           DW_AT_external [DW_FORM_flag_present]   (true)
+        - AbbrCode:        0x02
+          Values:
+            - Value:           0x00
+            - Value:           0x18
+
+# 0x18:   DW_TAG_LLVM_ptrauth_type
+#           DW_AT_type [DW_FORM_ref4]       (0x00000020 \"void (*)(...)\")
+#           DW_AT_LLVM_ptrauth_key [DW_FORM_data1]  (0x00)
+#           DW_AT_LLVM_ptrauth_extra_discriminator [DW_FORM_data2]  (0x002a)
+        - AbbrCode:        0x03
+          Values:
+            - Value:           0x20
+            - Value:           0x00
+            - Value:           0x2a
+
+# 0x20:   DW_TAG_pointer_type
+#           DW_AT_type [DW_AT_type [DW_FORM_ref4]       (0x00000025 \"void (...)\")
+        - AbbrCode:        0x04
+          Values:
+            - Value:           0x25
+
+# 0x25:   DW_TAG_subroutine_type
+        - AbbrCode:        0x05
+
+# 0x26:     DW_TAG_unspecified_parameters
+        - AbbrCode:        0x06
+
+        - AbbrCode:        0x00 # end of child tags of 0x25
+        - AbbrCode:        0x00 # end of child tags of 0x0c
+...
+)";
+  YAMLModuleTester t(yamldata);
+
+  DWARFUnit *unit = t.GetDwarfUnit();
+  ASSERT_NE(unit, nullptr);
+  const DWARFDebugInfoEntry *cu_entry = unit->DIE().GetDIE();
+  ASSERT_EQ(cu_entry->Tag(), DW_TAG_compile_unit);
+  DWARFDIE cu_die(unit, cu_entry);
+
+  auto holder = std::make_unique<clang_utils::TypeSystemClangHolder>("ast");
+  auto &ast_ctx = *holder->GetAST();
+  DWARFASTParserClangStub ast_parser(ast_ctx);
+
+  DWARFDIE ptrauth_variable = cu_die.GetFirstChild();
+  ASSERT_EQ(ptrauth_variable.Tag(), DW_TAG_variable);
+  DWARFDIE ptrauth_type =
+      ptrauth_variable.GetAttributeValueAsReferenceDIE(DW_AT_type);
+  ASSERT_EQ(ptrauth_type.Tag(), DW_TAG_LLVM_ptrauth_type);
+
+  SymbolContext sc;
+  bool new_type = false;
+  lldb::TypeSP type_sp =
+      ast_parser.ParseTypeFromDWARF(sc, ptrauth_type, &new_type);
+  CompilerType compiler_type = type_sp->GetForwardCompilerType();
+  ASSERT_EQ(compiler_type.GetPtrAuthKey(), 0);
+  ASSERT_EQ(compiler_type.GetPtrAuthAddressDiversity(), false);
+  ASSERT_EQ(compiler_type.GetPtrAuthDiscriminator(), 42);
+}
+
 struct ExtractIntFromFormValueTest : public testing::Test {
   SubsystemRAII<FileSystem, HostInfo> subsystems;
   clang_utils::TypeSystemClangHolder holder;

From f78949a07e33017a798c410a102c95455685a9b1 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams@sony.com>
Date: Tue, 30 Apr 2024 09:13:16 +0100
Subject: [PATCH 47/65] [NFC][Clang] Add FIXME comment to the workaround for
 issue #89774

---
 clang/lib/CodeGen/CGDebugInfo.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 787db350487417..fac278f0e20a43 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1373,6 +1373,8 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
   SourceLocation Loc = AliasDecl->getLocation();
 
   if (CGM.getCodeGenOpts().DebugTemplateAlias &&
+      // FIXME: This is a workaround for the issue
+      //        https://github.com/llvm/llvm-project/issues/89774
       // The TemplateSpecializationType doesn't contain any instantiation
       // information; dependent template arguments can't be resolved. For now,
       // fall back to DW_TAG_typedefs for template aliases that are

From 7ac1fb01e9b70d09e6c4f39414bcd7c93787ef91 Mon Sep 17 00:00:00 2001
From: Christian Sigg <chsigg@users.noreply.github.com>
Date: Tue, 30 Apr 2024 10:33:05 +0200
Subject: [PATCH 48/65] [mlir] Mark `isa/dyn_cast/cast/...` member functions
 deprecated. (#90413)

This also removes the member overload in TypeSwitch.

All other users have been removed in
fac349a169976f822fb27f03e623fa0d28aec1f3 and
bd9fdce69b4c4cdb572e715c5f453aaf9b77b83a.
---
 llvm/include/llvm/ADT/TypeSwitch.h       | 24 ++------------------
 mlir/include/mlir/IR/Attributes.h        |  5 +++++
 mlir/include/mlir/IR/Location.h          |  3 +++
 mlir/include/mlir/IR/Types.h             |  5 +++++
 mlir/include/mlir/Tools/PDLL/AST/Types.h | 28 ++++++++++++++++++++++++
 5 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 10a2d48e918db9..5bbbdf23b257ed 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -61,29 +61,9 @@ template <typename DerivedT, typename T> class TypeSwitchBase {
   }
 
 protected:
-  /// Trait to check whether `ValueT` provides a 'dyn_cast' method with type
-  /// `CastT`.
-  template <typename ValueT, typename CastT>
-  using has_dyn_cast_t =
-      decltype(std::declval<ValueT &>().template dyn_cast<CastT>());
-
-  /// Attempt to dyn_cast the given `value` to `CastT`. This overload is
-  /// selected if `value` already has a suitable dyn_cast method.
+  /// Attempt to dyn_cast the given `value` to `CastT`.
   template <typename CastT, typename ValueT>
-  static decltype(auto) castValue(
-      ValueT &&value,
-      std::enable_if_t<is_detected<has_dyn_cast_t, ValueT, CastT>::value> * =
-          nullptr) {
-    return value.template dyn_cast<CastT>();
-  }
-
-  /// Attempt to dyn_cast the given `value` to `CastT`. This overload is
-  /// selected if llvm::dyn_cast should be used.
-  template <typename CastT, typename ValueT>
-  static decltype(auto) castValue(
-      ValueT &&value,
-      std::enable_if_t<!is_detected<has_dyn_cast_t, ValueT, CastT>::value> * =
-          nullptr) {
+  static decltype(auto) castValue(ValueT &&value) {
     return dyn_cast<CastT>(value);
   }
 
diff --git a/mlir/include/mlir/IR/Attributes.h b/mlir/include/mlir/IR/Attributes.h
index cc0cee6a31183c..8a077865b51b5f 100644
--- a/mlir/include/mlir/IR/Attributes.h
+++ b/mlir/include/mlir/IR/Attributes.h
@@ -50,14 +50,19 @@ class Attribute {
   /// Casting utility functions. These are deprecated and will be removed,
   /// please prefer using the `llvm` namespace variants instead.
   template <typename... Tys>
+  [[deprecated("Use mlir::isa<U>() instead")]]
   bool isa() const;
   template <typename... Tys>
+  [[deprecated("Use mlir::isa_and_nonnull<U>() instead")]]
   bool isa_and_nonnull() const;
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast<U>() instead")]]
   U dyn_cast() const;
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast_or_null<U>() instead")]]
   U dyn_cast_or_null() const;
   template <typename U>
+  [[deprecated("Use mlir::cast<U>() instead")]]
   U cast() const;
 
   /// Return a unique identifier for the concrete attribute type. This is used
diff --git a/mlir/include/mlir/IR/Location.h b/mlir/include/mlir/IR/Location.h
index aa8314f38cdfac..423b4d19b5b944 100644
--- a/mlir/include/mlir/IR/Location.h
+++ b/mlir/include/mlir/IR/Location.h
@@ -78,14 +78,17 @@ class Location {
 
   /// Type casting utilities on the underlying location.
   template <typename U>
+  [[deprecated("Use mlir::isa<U>() instead")]]
   bool isa() const {
     return llvm::isa<U>(*this);
   }
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast<U>() instead")]]
   U dyn_cast() const {
     return llvm::dyn_cast<U>(*this);
   }
   template <typename U>
+  [[deprecated("Use mlir::cast<U>() instead")]]
   U cast() const {
     return llvm::cast<U>(*this);
   }
diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h
index a89e13b625bf40..65824531fdc908 100644
--- a/mlir/include/mlir/IR/Types.h
+++ b/mlir/include/mlir/IR/Types.h
@@ -97,14 +97,19 @@ class Type {
   bool operator!() const { return impl == nullptr; }
 
   template <typename... Tys>
+  [[deprecated("Use mlir::isa<U>() instead")]]
   bool isa() const;
   template <typename... Tys>
+  [[deprecated("Use mlir::isa_and_nonnull<U>() instead")]]
   bool isa_and_nonnull() const;
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast<U>() instead")]]
   U dyn_cast() const;
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast_or_null<U>() instead")]]
   U dyn_cast_or_null() const;
   template <typename U>
+  [[deprecated("Use mlir::cast<U>() instead")]]
   U cast() const;
 
   /// Return a unique identifier for the concrete type. This is used to support
diff --git a/mlir/include/mlir/Tools/PDLL/AST/Types.h b/mlir/include/mlir/Tools/PDLL/AST/Types.h
index 03252e9f6620c8..89c8e193ddc32b 100644
--- a/mlir/include/mlir/Tools/PDLL/AST/Types.h
+++ b/mlir/include/mlir/Tools/PDLL/AST/Types.h
@@ -64,23 +64,28 @@ class Type {
 
   /// Provide type casting support.
   template <typename U>
+  [[deprecated("Use mlir::isa<U>() instead")]]
   bool isa() const {
     assert(impl && "isa<> used on a null type.");
     return U::classof(*this);
   }
   template <typename U, typename V, typename... Others>
+  [[deprecated("Use mlir::isa<U>() instead")]]
   bool isa() const {
     return isa<U>() || isa<V, Others...>();
   }
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast<U>() instead")]]
   U dyn_cast() const {
     return isa<U>() ? U(impl) : U(nullptr);
   }
   template <typename U>
+  [[deprecated("Use mlir::dyn_cast_or_null<U>() instead")]]
   U dyn_cast_or_null() const {
     return (impl && isa<U>()) ? U(impl) : U(nullptr);
   }
   template <typename U>
+  [[deprecated("Use mlir::cast<U>() instead")]]
   U cast() const {
     assert(isa<U>());
     return U(impl);
@@ -323,6 +328,29 @@ struct DenseMapInfo<mlir::pdll::ast::Type> {
     return lhs == rhs;
   }
 };
+
+/// Add support for llvm style casts.
+/// We provide a cast between To and From if From is mlir::pdll::ast::Type or
+/// derives from it
+template <typename To, typename From>
+struct CastInfo<
+    To, From,
+    std::enable_if_t<
+        std::is_same_v<mlir::pdll::ast::Type, std::remove_const_t<From>> ||
+        std::is_base_of_v<mlir::pdll::ast::Type, From>>>
+    : NullableValueCastFailed<To>,
+      DefaultDoCastIfPossible<To, From, CastInfo<To, From>> {
+  static inline bool isPossible(mlir::pdll::ast::Type ty) {
+    /// Return a constant true instead of a dynamic true when casting to self or
+    /// up the hierarchy.
+    if constexpr (std::is_base_of_v<To, From>) {
+      return true;
+    } else {
+      return To::classof(ty);
+    };
+  }
+  static inline To doCast(mlir::pdll::ast::Type ty) { return To(ty.getImpl()); }
+};
 } // namespace llvm
 
 #endif // MLIR_TOOLS_PDLL_AST_TYPES_H_

From b2b463bd8f6b21f040b80c4493682cf74f8dced5 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Thu, 18 Apr 2024 16:54:22 +0800
Subject: [PATCH 49/65] [C++20] [Modules] Add signature to the BMI recording
 export imported modules

After https://github.com/llvm/llvm-project/pull/86912,
for the following example,

```
export module A;
export import B;
```

The generated BMI of `A` won't change if the source location in `A`
changes. Further, we plan avoid more such changes.

However, it is slightly problematic since `export import` should
propagate all the changes.

So this patch adds a signature to the BMI of C++20 modules so that we
can propagate the changes correctly.
---
 clang/include/clang/Serialization/ASTWriter.h |  1 +
 clang/lib/Serialization/ASTWriter.cpp         | 72 +++++++++++++------
 .../Modules/force-transitive-changes.cppm     | 38 ++++++++++
 .../no-transitive-source-location-change.cppm | 29 +++++++-
 4 files changed, 114 insertions(+), 26 deletions(-)
 create mode 100644 clang/test/Modules/force-transitive-changes.cppm

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 428bf6a5a791b3..921678d278d6e2 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -525,6 +525,7 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Calculate hash of the pcm content.
   std::pair<ASTFileSignature, ASTFileSignature> createSignature() const;
+  ASTFileSignature createSignatureForNamedModule() const;
 
   void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 4d85f6eb10d232..c3fcd1a4df2368 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1174,26 +1174,47 @@ ASTWriter::createSignature() const {
   return std::make_pair(ASTBlockHash, Signature);
 }
 
+ASTFileSignature ASTWriter::createSignatureForNamedModule() const {
+  llvm::SHA1 Hasher;
+  Hasher.update(StringRef(Buffer.data(), Buffer.size()));
+
+  assert(WritingModule);
+  assert(WritingModule->isNamedModule());
+
+  // We need to combine all the export imported modules no matter
+  // we used it or not.
+  for (auto [ExportImported, _] : WritingModule->Exports)
+    Hasher.update(ExportImported->Signature);
+
+  return ASTFileSignature::create(Hasher.result());
+}
+
+static void BackpatchSignatureAt(llvm::BitstreamWriter &Stream,
+                                 const ASTFileSignature &S, uint64_t BitNo) {
+  for (uint8_t Byte : S) {
+    Stream.BackpatchByte(BitNo, Byte);
+    BitNo += 8;
+  }
+}
+
 ASTFileSignature ASTWriter::backpatchSignature() {
+  if (isWritingStdCXXNamedModules()) {
+    ASTFileSignature Signature = createSignatureForNamedModule();
+    BackpatchSignatureAt(Stream, Signature, SignatureOffset);
+    return Signature;
+  }
+
   if (!WritingModule ||
       !PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)
     return {};
 
   // For implicit modules, write the hash of the PCM as its signature.
-
-  auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) {
-    for (uint8_t Byte : S) {
-      Stream.BackpatchByte(BitNo, Byte);
-      BitNo += 8;
-    }
-  };
-
   ASTFileSignature ASTBlockHash;
   ASTFileSignature Signature;
   std::tie(ASTBlockHash, Signature) = createSignature();
 
-  BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset);
-  BackpatchSignatureAt(Signature, SignatureOffset);
+  BackpatchSignatureAt(Stream, ASTBlockHash, ASTBlockHashOffset);
+  BackpatchSignatureAt(Stream, Signature, SignatureOffset);
 
   return Signature;
 }
@@ -1210,9 +1231,11 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
   RecordData Record;
   Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5);
 
-  // For implicit modules, write the hash of the PCM as its signature.
-  if (WritingModule &&
-      PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
+  // For implicit modules and C++20 named modules, write the hash of the PCM as
+  // its signature.
+  if (isWritingStdCXXNamedModules() ||
+      (WritingModule &&
+       PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent)) {
     // At this point, we don't know the actual signature of the file or the AST
     // block - we're only able to compute those at the end of the serialization
     // process. Let's store dummy signatures for now, and replace them with the
@@ -1223,21 +1246,24 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
     auto Dummy = ASTFileSignature::createDummy();
     SmallString<128> Blob{Dummy.begin(), Dummy.end()};
 
-    auto Abbrev = std::make_shared<BitCodeAbbrev>();
-    Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
-    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-    unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+    // We don't need AST Block hash in named modules.
+    if (!isWritingStdCXXNamedModules()) {
+      auto Abbrev = std::make_shared<BitCodeAbbrev>();
+      Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH));
+      Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+      unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
 
-    Abbrev = std::make_shared<BitCodeAbbrev>();
+      Record.push_back(AST_BLOCK_HASH);
+      Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
+      ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
+      Record.clear();
+    }
+
+    auto Abbrev = std::make_shared<BitCodeAbbrev>();
     Abbrev->Add(BitCodeAbbrevOp(SIGNATURE));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
 
-    Record.push_back(AST_BLOCK_HASH);
-    Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob);
-    ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
-    Record.clear();
-
     Record.push_back(SIGNATURE);
     Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob);
     SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8;
diff --git a/clang/test/Modules/force-transitive-changes.cppm b/clang/test/Modules/force-transitive-changes.cppm
new file mode 100644
index 00000000000000..5732d264d1d552
--- /dev/null
+++ b/clang/test/Modules/force-transitive-changes.cppm
@@ -0,0 +1,38 @@
+// Test that the changes from export imported modules and touched
+// modules can be popullated as expected.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o %t/A.v1.pcm
+
+// The BMI of B should change it export imports A, so all the change to A should be popullated
+// to B.
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/B.v1.pcm
+// RUN: not diff %t/B.v1.pcm %t/B.pcm  &> /dev/null
+
+//--- A.cppm
+export module A;
+export int funcA() {
+    return 43;
+}
+
+//--- A.v1.cppm
+export module A;
+
+export int funcA() {
+    return 43;
+}
+
+//--- B.cppm
+export module B;
+export import A;
+
+export int funcB() {
+    return funcA();
+}
diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm
index b876fbb6d9f0cf..83cf6fb4f684d0 100644
--- a/clang/test/Modules/no-transitive-source-location-change.cppm
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -6,8 +6,6 @@
 // RUN: cd %t
 //
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
-
-//
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm
 //
 // The BMI may not be the same since the source location differs.
@@ -26,6 +24,31 @@
 // RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -fmodule-file=A=%t/A.v1.pcm \
 // RUN:     -o %t/C.v1.pcm
 // RUN: not diff %t/C.v1.pcm %t/C.pcm  &> /dev/null
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// Test again with reduced BMI.
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o %t/A.v1.pcm
+//
+// The BMI may not be the same since the source location differs.
+// RUN: not diff %t/A.pcm %t/A.v1.pcm &> /dev/null
+//
+// The BMI of B shouldn't change since all the locations remain the same.
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/B.v1.pcm
+// RUN: diff %t/B.v1.pcm %t/B.pcm  &> /dev/null
+//
+// The BMI of C may change since the locations for instantiations changes.
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.pcm \
+// RUN:     -o %t/C.pcm
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-reduced-module-interface -fmodule-file=A=%t/A.v1.pcm \
+// RUN:     -o %t/C.v1.pcm
+// RUN: not diff %t/C.v1.pcm %t/C.pcm  &> /dev/null
 
 //--- A.cppm
 export module A;
@@ -63,7 +86,7 @@ export int funcB() {
 //--- C.cppm
 export module C;
 import A;
-export void testD() {
+export inline void testD() {
     C<int> c;
     c.func();
 }

From fce0916969218fdb4b89ad0b3e18599204d4138d Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 16:37:27 +0800
Subject: [PATCH 50/65] =?UTF-8?q?[NFC]=20[C++20]=20[Modules]=20Use=20new?=
 =?UTF-8?q?=20class=20CXX20ModulesGenerator=20to=20genera=E2=80=A6=20(#905?=
 =?UTF-8?q?70)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…te module file for C++20 modules instead of PCHGenerator

Previously we're re-using PCHGenerator to generate the module file for
C++20 modules. But this is slighty more or less odd. This patch tries to
use a new class 'CXX20ModulesGenerator' to generate the module file for
C++20 modules.
---
 clang/include/clang/Serialization/ASTWriter.h | 25 ++++++++++++++++---
 clang/lib/Frontend/FrontendActions.cpp        | 11 +++-----
 clang/lib/Serialization/GeneratePCH.cpp       | 25 +++++++++++--------
 clang/test/Modules/pr67893.cppm               |  2 +-
 clang/test/Modules/search-partitions.cpp      |  8 +++---
 5 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 921678d278d6e2..cfcd787530d0e9 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -890,6 +890,8 @@ class ASTWriter : public ASTDeserializationListener,
 /// AST and semantic-analysis consumer that generates a
 /// precompiled header from the parsed source code.
 class PCHGenerator : public SemaConsumer {
+  void anchor() override;
+
   Preprocessor &PP;
   std::string OutputFile;
   std::string isysroot;
@@ -933,17 +935,34 @@ class PCHGenerator : public SemaConsumer {
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
 };
 
-class ReducedBMIGenerator : public PCHGenerator {
+class CXX20ModulesGenerator : public PCHGenerator {
+  void anchor() override;
+
 protected:
   virtual Module *getEmittingModule(ASTContext &Ctx) override;
 
+  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                        StringRef OutputFile, bool GeneratingReducedBMI);
+
 public:
-  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
-                      StringRef OutputFile);
+  CXX20ModulesGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                        StringRef OutputFile)
+      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+                              /*GeneratingReducedBMI=*/false) {}
 
   void HandleTranslationUnit(ASTContext &Ctx) override;
 };
 
+class ReducedBMIGenerator : public CXX20ModulesGenerator {
+  void anchor() override;
+
+public:
+  ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache,
+                      StringRef OutputFile)
+      : CXX20ModulesGenerator(PP, ModuleCache, OutputFile,
+                              /*GeneratingReducedBMI=*/true) {}
+};
+
 /// If we can elide the definition of \param D in reduced BMI.
 ///
 /// Generally, we can elide the definition of a declaration if it won't affect
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 480dfa8c975933..454653a31534cd 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -272,13 +272,10 @@ bool GenerateModuleInterfaceAction::BeginSourceFileAction(
 std::unique_ptr<ASTConsumer>
 GenerateModuleInterfaceAction::CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) {
-  CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
-  CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
-
-  std::vector<std::unique_ptr<ASTConsumer>> Consumers =
-      CreateMultiplexConsumer(CI, InFile);
-  if (Consumers.empty())
-    return nullptr;
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(std::make_unique<CXX20ModulesGenerator>(
+      CI.getPreprocessor(), CI.getModuleCache(),
+      CI.getFrontendOpts().OutputFile));
 
   if (CI.getFrontendOpts().GenReducedBMI &&
       !CI.getFrontendOpts().ModuleOutputPath.empty()) {
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index a2ddbe4624aae4..cc06106a47708e 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -88,31 +88,30 @@ ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() {
   return &Writer;
 }
 
-ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor &PP,
-                                         InMemoryModuleCache &ModuleCache,
-                                         StringRef OutputFile)
+void PCHGenerator::anchor() {}
+
+CXX20ModulesGenerator::CXX20ModulesGenerator(Preprocessor &PP,
+                                             InMemoryModuleCache &ModuleCache,
+                                             StringRef OutputFile,
+                                             bool GeneratingReducedBMI)
     : PCHGenerator(
           PP, ModuleCache, OutputFile, llvm::StringRef(),
           std::make_shared<PCHBuffer>(),
           /*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(),
           /*AllowASTWithErrors*/ false, /*IncludeTimestamps=*/false,
           /*BuildingImplicitModule=*/false, /*ShouldCacheASTInMemory=*/false,
-          /*GeneratingReducedBMI=*/true) {}
+          GeneratingReducedBMI) {}
 
-Module *ReducedBMIGenerator::getEmittingModule(ASTContext &Ctx) {
+Module *CXX20ModulesGenerator::getEmittingModule(ASTContext &Ctx) {
   Module *M = Ctx.getCurrentNamedModule();
   assert(M && M->isNamedModuleUnit() &&
-         "ReducedBMIGenerator should only be used with C++20 Named modules.");
+         "CXX20ModulesGenerator should only be used with C++20 Named modules.");
   return M;
 }
 
-void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
-  // We need to do this to make sure the size of reduced BMI not to be larger
-  // than full BMI.
-  //
+void CXX20ModulesGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   // FIMXE: We'd better to wrap such options to a new class ASTWriterOptions
   // since this is not about searching header really.
-  // FIXME2: We'd better to move the class writing full BMI with reduced BMI.
   HeaderSearchOptions &HSOpts =
       getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts();
   HSOpts.ModulesSkipDiagnosticOptions = true;
@@ -134,3 +133,7 @@ void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   *OS << getBufferPtr()->Data;
   OS->flush();
 }
+
+void CXX20ModulesGenerator::anchor() {}
+
+void ReducedBMIGenerator::anchor() {}
diff --git a/clang/test/Modules/pr67893.cppm b/clang/test/Modules/pr67893.cppm
index 58990cec01d666..95479193f8ea2b 100644
--- a/clang/test/Modules/pr67893.cppm
+++ b/clang/test/Modules/pr67893.cppm
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm \
 // RUN:      -emit-module-interface -o %t/a.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.cppm \
-// RUN:      -emit-module-interface -fprebuilt-module-path=%t
+// RUN:      -emit-module-interface -fprebuilt-module-path=%t -o %t/m.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/m.pcm  \
 // RUN:      -fprebuilt-module-path=%t -S -emit-llvm -o - | FileCheck %t/m.cppm
 
diff --git a/clang/test/Modules/search-partitions.cpp b/clang/test/Modules/search-partitions.cpp
index 92732958db94e6..92f7c637c83384 100644
--- a/clang/test/Modules/search-partitions.cpp
+++ b/clang/test/Modules/search-partitions.cpp
@@ -11,7 +11,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/moduleA.cpp \
+// RUN: %clang_cc1 -std=c++20 %t/moduleA.cpp -fsyntax-only -verify \
 // RUN:  -fprebuilt-module-path=%t
 
 // Test again with reduced BMI
@@ -28,9 +28,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/partition3.cpp \
 // RUN:  -o %t/A-Part3.pcm
 
-// RUN: %clang_cc1 -std=c++20 -fsyntax-only %t/moduleA.cpp -fprebuilt-module-path=%t 
-
-// expected-no-diagnostics
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %t/moduleA.cpp -fprebuilt-module-path=%t 
 
 //--- partition1.cpp
 export module A:Part1;
@@ -50,7 +48,7 @@ export module A:Part3;
 int part3();
 
 //--- moduleA.cpp
-
+// expected-no-diagnostics
 export module A;
 
 import :Part1;

From 21f8cedc4aa542f628035cd5dafd2d2529eb4397 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Tue, 30 Apr 2024 09:00:37 +0000
Subject: [PATCH 51/65] [flang] Fix debug-fn-info.f90 test

91a8cb781dbc981356207e0c3608d92ed6d26042 was originally written
before 8d5386669ed63548daf1bee415596582d6d78d7d landed. The latter
changed how main is emitted which changed the numbering of the
suprograms in the test output.

To fix this I've added a check for the new _QQmain and renumbered
the existing checks.
---
 flang/test/Transforms/debug-fn-info.f90 | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/flang/test/Transforms/debug-fn-info.f90 b/flang/test/Transforms/debug-fn-info.f90
index c1a817312c959c..97a34e8676de7f 100644
--- a/flang/test/Transforms/debug-fn-info.f90
+++ b/flang/test/Transforms/debug-fn-info.f90
@@ -7,9 +7,11 @@
 ! CHECK-DAG: #[[LOG1:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 8, encoding = DW_ATE_boolean>
 ! CHECK-DAG: #[[REAL4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 32, encoding = DW_ATE_float>
 ! CHECK-DAG: #[[LOG4:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "logical", sizeInBits = 32, encoding = DW_ATE_boolean>
+! CHECK: #[[TY0:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_program>
 ! CHECK: #[[TY1:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #[[INT8]], #[[INT4]], #[[REAL8]], #[[LOG1]]>
 ! CHECK: #[[TY2:.*]] = #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #[[INT4]], #[[INT8]], #[[REAL4]], #[[LOG4]]>
 
+! CHECK: #di_subprogram1 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "_QQmain", linkageName = "_QQmain", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY0]]>
 program mn
   integer(kind=4) :: i4
   integer(kind=8) :: i8
@@ -20,7 +22,7 @@ program mn
   i8 = fn1(i4, r8, l1)
   i4 = fn2(i8, r4, l4)
 contains
-  ! CHECK: #di_subprogram1 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn1", linkageName = "_QFPfn1", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY1]]>
+  ! CHECK: #di_subprogram2 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn1", linkageName = "_QFPfn1", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY1]]>
   function fn1(a, b, c) result (res)
     implicit none
     integer(kind=4), intent(in) :: a
@@ -30,7 +32,7 @@ function fn1(a, b, c) result (res)
     res = a + b
   end function
 
-! CHECK: #di_subprogram2 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn2", linkageName = "_QFPfn2", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY2]]>
+! CHECK: #di_subprogram3 = #llvm.di_subprogram<id = {{.*}}, compileUnit = {{.*}}, scope = {{.*}}, name = "fn2", linkageName = "_QFPfn2", file = {{.*}}, line = [[@LINE+1]], scopeLine = [[@LINE+1]], subprogramFlags = Definition, type = #[[TY2]]>
   function fn2(a, b, c) result (res)
     implicit none
     integer(kind=8), intent(in) :: a

From 10aab63c9cb49d3ddfbe2cf8992de433efeef6f1 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 30 Apr 2024 17:05:38 +0800
Subject: [PATCH 52/65] [NFC] [tests] Don't try to remove and create the same
 directory

In the test of
clang/test/Modules/no-transitive-source-location-change.cppm, there were
reports about invalid directory names in windows. The reason may be that
we may remove and create the same directory. This patch tries to avoid
such patterns for that.
---
 clang/test/Modules/no-transitive-source-location-change.cppm | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/clang/test/Modules/no-transitive-source-location-change.cppm b/clang/test/Modules/no-transitive-source-location-change.cppm
index 83cf6fb4f684d0..303142a1af890b 100644
--- a/clang/test/Modules/no-transitive-source-location-change.cppm
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -3,7 +3,6 @@
 //
 // RUN: rm -rf %t
 // RUN: split-file %s %t
-// RUN: cd %t
 //
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o %t/A.v1.pcm
@@ -25,10 +24,6 @@
 // RUN:     -o %t/C.v1.pcm
 // RUN: not diff %t/C.v1.pcm %t/C.pcm  &> /dev/null
 //
-// RUN: rm -rf %t
-// RUN: split-file %s %t
-// RUN: cd %t
-//
 // Test again with reduced BMI.
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o %t/A.v1.pcm

From f73e87f53f5d8a86c29251dedc9dbd264179203a Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Tue, 30 Apr 2024 11:34:34 +0200
Subject: [PATCH 53/65] [Clang][Sema] Do not accept "vector _Complex" for
 AltiVec/ZVector (#90467)

The AltiVec (POWER) and ZVector (IBM Z) language extensions do not
support using the "vector" keyword when the element type is a complex
type, but current code does not verify this.

Add a Sema check and diagnostic for this case.

Fixes: https://github.com/llvm/llvm-project/issues/88399
---
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 ++
 clang/lib/Sema/DeclSpec.cpp                      | 5 ++++-
 clang/test/Parser/altivec.c                      | 6 ++++++
 clang/test/Parser/cxx-altivec.cpp                | 6 ++++++
 clang/test/Sema/zvector.c                        | 8 ++++++++
 clang/test/Sema/zvector2.c                       | 4 ++++
 6 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f72d5c252b863e..5cb3c808957b30 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -307,6 +307,8 @@ def err_invalid_vector_long_long_decl_spec : Error <
   "POWER7 or later) to be enabled">;
 def err_invalid_vector_long_double_decl_spec : Error<
   "cannot use 'long double' with '__vector'">;
+def err_invalid_vector_complex_decl_spec : Error<
+  "cannot use '_Complex' with '__vector'">;
 def warn_vector_long_decl_spec_combination : Warning<
   "Use of 'long' with '__vector' is deprecated">, InGroup<Deprecated>;
 
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 5f63c857c43067..60e81890257002 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -1202,7 +1202,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
         !S.Context.getTargetInfo().hasFeature("power8-vector"))
       S.Diag(TSTLoc, diag::err_invalid_vector_int128_decl_spec);
 
-    if (TypeAltiVecBool) {
+    // Complex vector types are not supported.
+    if (TypeSpecComplex != TSC_unspecified)
+      S.Diag(TSCLoc, diag::err_invalid_vector_complex_decl_spec);
+    else if (TypeAltiVecBool) {
       // Sign specifiers are not allowed with vector bool. (PIM 2.1)
       if (getTypeSpecSign() != TypeSpecifierSign::Unspecified) {
         S.Diag(TSSLoc, diag::err_invalid_vector_bool_decl_spec)
diff --git a/clang/test/Parser/altivec.c b/clang/test/Parser/altivec.c
index daee5eae4d8430..445369f0dc0660 100644
--- a/clang/test/Parser/altivec.c
+++ b/clang/test/Parser/altivec.c
@@ -110,6 +110,12 @@ vector __bool long long v_bll4;      // expected-error {{use of 'long long' with
 #endif
 __vector long double  vv_ld3;        // expected-error {{cannot use 'long double' with '__vector'}}
 vector long double  v_ld4;           // expected-error {{cannot use 'long double' with '__vector'}}
+vector float _Complex v_cf;          // expected-error {{cannot use '_Complex' with '__vector'}}
+vector double _Complex v_cd;         // expected-error {{cannot use '_Complex' with '__vector'}}
+vector long double _Complex v_cld;   // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector float _Complex v_cf2;       // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector double _Complex v_cd2;      // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector long double _Complex v_cld2;// expected-error {{cannot use '_Complex' with '__vector'}}
 vector bool float v_bf;              // expected-error {{cannot use 'float' with '__vector bool'}}
 vector bool double v_bd;             // expected-error {{cannot use 'double' with '__vector bool'}}
 vector bool pixel v_bp;              // expected-error {{cannot use '__pixel' with '__vector bool'}}
diff --git a/clang/test/Parser/cxx-altivec.cpp b/clang/test/Parser/cxx-altivec.cpp
index 6da36663422b49..5cb760dababbb6 100644
--- a/clang/test/Parser/cxx-altivec.cpp
+++ b/clang/test/Parser/cxx-altivec.cpp
@@ -111,6 +111,12 @@ vector __bool long long v_bll4;      // expected-error {{use of 'long long' with
 #endif
 __vector long double  vv_ld3;        // expected-error {{cannot use 'long double' with '__vector'}}
 vector long double  v_ld4;           // expected-error {{cannot use 'long double' with '__vector'}}
+vector float _Complex v_cf;          // expected-error {{cannot use '_Complex' with '__vector'}}
+vector double _Complex v_cd;         // expected-error {{cannot use '_Complex' with '__vector'}}
+vector long double _Complex v_cld;   // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector float _Complex v_cf2;       // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector double _Complex v_cd2;      // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector long double _Complex v_cld2;// expected-error {{cannot use '_Complex' with '__vector'}}
 // FIXME: why is this diagnostic different from the others?
 vector bool v_b;                     // expected-error {{a type specifier is required for all declarations}}
 vector bool float v_bf;              // expected-error {{cannot use 'float' with '__vector bool'}}
diff --git a/clang/test/Sema/zvector.c b/clang/test/Sema/zvector.c
index 798b12bd50b767..900c39adc2a371 100644
--- a/clang/test/Sema/zvector.c
+++ b/clang/test/Sema/zvector.c
@@ -22,6 +22,10 @@ vector double fd, fd2;
 
 vector long ll; // expected-error {{cannot use 'long' with '__vector'}}
 vector float ff; // expected-error {{cannot use 'float' with '__vector'}}
+vector long double ld; // expected-error {{cannot use 'long double' with '__vector'}}
+vector float _Complex cf; // expected-error {{cannot use '_Complex' with '__vector'}}
+vector double _Complex cd; // expected-error {{cannot use '_Complex' with '__vector'}}
+vector long double _Complex cld; // expected-error {{cannot use '_Complex' with '__vector'}}
 
 signed char sc_scalar;
 unsigned char uc_scalar;
@@ -53,6 +57,10 @@ __vector bool long long bl3;
 __vector double fd3;
 __vector long ll3; // expected-error {{cannot use 'long' with '__vector'}}
 __vector float ff3; // expected-error {{cannot use 'float' with '__vector'}}
+__vector long double ld3; // expected-error {{cannot use 'long double' with '__vector'}}
+__vector float _Complex cf3; // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector double _Complex cd3; // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector long double _Complex cld3; // expected-error {{cannot use '_Complex' with '__vector'}}
 
 // Likewise for __bool
 vector __bool char bc4;
diff --git a/clang/test/Sema/zvector2.c b/clang/test/Sema/zvector2.c
index a4e0a9e2c3f18f..8a440374f335f5 100644
--- a/clang/test/Sema/zvector2.c
+++ b/clang/test/Sema/zvector2.c
@@ -25,6 +25,10 @@ vector float ff, ff2;
 // Verify that __vector is also recognized
 __vector float ff3;
 
+// With z14 we support vector float, but still no vector _Complex float.
+vector float _Complex cf; // expected-error {{cannot use '_Complex' with '__vector'}}
+__vector float _Complex cf3; // expected-error {{cannot use '_Complex' with '__vector'}}
+
 // Verify operation of vec_step
 int res_ff[vec_step(ff) == 4 ? 1 : -1];
 

From 62dea99a7d7df9daedbb86133f3d46699cd2728d Mon Sep 17 00:00:00 2001
From: David Stuttard <david.stuttard@amd.com>
Date: Tue, 30 Apr 2024 10:41:51 +0100
Subject: [PATCH 54/65] [AMDGPU] Fix gfx12 waitcnt type for image_msaa_load
 (#90201)

image_msaa_load is actually encoded as a VSAMPLE instruction and
requires the appropriate waitcnt variant.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   |  8 ++++--
 .../AMDGPU/llvm.amdgcn.image.msaa.load.ll     | 26 +++++++++----------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 91a1c40dd8247d..15a1db51c6d78b 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -187,8 +187,12 @@ VmemType getVmemType(const MachineInstr &Inst) {
   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
   const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
-  return BaseInfo->BVH ? VMEM_BVH
-                       : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
+  // The test for MSAA here is because gfx12+ image_msaa_load is actually
+  // encoded as VSAMPLE and requires the appropriate s_waitcnt variant for that.
+  // Pre-gfx12 doesn't care since all vmem types result in the same s_waitcnt.
+  return BaseInfo->BVH                         ? VMEM_BVH
+         : BaseInfo->Sampler || BaseInfo->MSAA ? VMEM_SAMPLER
+                                               : VMEM_NOSAMPLER;
 }
 
 unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
index 7b1f55e7eebac0..b75723f544d1dd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
@@ -12,7 +12,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
 ; GFX12-LABEL: load_2dmsaa:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -50,7 +50,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrsp
 ; GFX12-NEXT:    v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; encoding: [0x0a,0x01,0x10,0xca,0x0b,0x01,0x02,0x02]
 ; GFX12-NEXT:    v_mov_b32_e32 v4, v12 ; encoding: [0x0c,0x03,0x08,0x7e]
 ; GFX12-NEXT:    image_msaa_load v[0:4], [v7, v6, v5], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x0e,0x20,0x86,0xe4,0x00,0x01,0x00,0x00,0x07,0x06,0x05,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    global_store_b32 v8, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x08,0x00,0x00,0x00]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
@@ -71,7 +71,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3
 ; GFX12-LABEL: load_2darraymsaa:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x07,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 4, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -110,7 +110,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr ad
 ; GFX12-NEXT:    v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 ; encoding: [0x0b,0x01,0x10,0xca,0x0c,0x01,0x02,0x02]
 ; GFX12-NEXT:    v_mov_b32_e32 v4, v13 ; encoding: [0x0d,0x03,0x08,0x7e]
 ; GFX12-NEXT:    image_msaa_load v[0:4], [v8, v7, v6, v5], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x0f,0x20,0x06,0xe6,0x00,0x00,0x00,0x00,0x08,0x07,0x06,0x05]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    global_store_b32 v9, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x09,0x00,0x00,0x00]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
@@ -131,7 +131,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32
 ; GFX12-LABEL: load_2dmsaa_glc:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_NT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x10,0x00,0x00,0x01,0x02,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 1)
@@ -148,7 +148,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32
 ; GFX12-LABEL: load_2dmsaa_slc:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_HT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x20,0x00,0x00,0x01,0x02,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 2)
@@ -165,7 +165,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s,
 ; GFX12-LABEL: load_2dmsaa_glc_slc:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_LU ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x30,0x00,0x00,0x01,0x02,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 3)
@@ -182,7 +182,7 @@ define amdgpu_ps <4 x half> @load_2dmsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32
 ; GFX12-LABEL: load_2dmsaa_d16:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x26,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -215,7 +215,7 @@ define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addr
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
 ; GFX12-NEXT:    v_mov_b32_e32 v2, v8 ; encoding: [0x08,0x03,0x04,0x7e]
 ; GFX12-NEXT:    image_msaa_load v[0:2], [v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x2e,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x05,0x04,0x03,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    global_store_b32 v6, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x06,0x00,0x00,0x00]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
@@ -236,7 +236,7 @@ define amdgpu_ps <4 x half> @load_2darraymsaa_d16(<8 x i32> inreg %rsrc, i32 %s,
 ; GFX12-LABEL: load_2darraymsaa_d16:
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    image_msaa_load v[0:1], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x27,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -269,7 +269,7 @@ define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
 ; GFX12-NEXT:    v_mov_b32_e32 v2, v9 ; encoding: [0x09,0x03,0x04,0x7e]
 ; GFX12-NEXT:    image_msaa_load v[0:2], [v6, v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x2f,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x06,0x05,0x04,0x03]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    global_store_b32 v7, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x07,0x00,0x00,0x00]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
@@ -292,7 +292,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16
 ; GFX12:       ; %bb.0: ; %main_body
 ; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32 1, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -313,7 +313,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_a16(<8 x i32> inreg %rsrc, i16 %s
 ; GFX12-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
 ; GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
 ; GFX12-NEXT:    image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x47,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
-; GFX12-NEXT:    s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0,0xbf]
+; GFX12-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32 4, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)

From fb2d3056618e3d03ba9a695627c7b002458e59f0 Mon Sep 17 00:00:00 2001
From: Danial Klimkin <dklimkin@google.com>
Date: Tue, 30 Apr 2024 11:42:13 +0200
Subject: [PATCH 55/65] Fix output in coro-elide-thinlto.cpp (#90579)

Current dir can be read-only. Use a temp path instead.
---
 clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp b/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
index 790899486ec9d1..5c5fed2f764622 100644
--- a/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
+++ b/clang/test/CodeGenCoroutines/coro-elide-thinlto.cpp
@@ -4,8 +4,8 @@
 // RUN: split-file %s %t
 // RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-callee.cpp -o %t/coro-elide-callee.o
 // RUN: %clang --target=x86_64-linux -std=c++20 -O2 -flto=thin -I %S -c %t/coro-elide-caller.cpp -o %t/coro-elide-caller.o
-// RUN: llvm-lto -thinlto %t/coro-elide-callee.o %t/coro-elide-caller.o -o summary
-// RUN: %clang_cc1 -O2 -x ir %t/coro-elide-caller.o -fthinlto-index=summary.thinlto.bc -emit-llvm -o - | FileCheck %s
+// RUN: llvm-lto -thinlto %t/coro-elide-callee.o %t/coro-elide-caller.o -o %t/summary
+// RUN: %clang_cc1 -O2 -x ir %t/coro-elide-caller.o -fthinlto-index=%t/summary.thinlto.bc -emit-llvm -o - | FileCheck %s
 
 //--- coro-elide-task.h
 #pragma once

From f10685f3e606e9e50906d9bf4e302a4281664152 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 29 Apr 2024 17:55:33 +0100
Subject: [PATCH 56/65] [X86] lowerAtomicArith - use DAG::getNegative() helper.
 NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a811ce43422ec8..0a58ee4a651a96 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31534,11 +31534,10 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
     // Handle (atomic_load_xor p, SignBit) as (atomic_load_add p, SignBit) so we
     // can use LXADD as opposed to cmpxchg.
     if (Opc == ISD::ATOMIC_LOAD_SUB ||
-        (Opc == ISD::ATOMIC_LOAD_XOR && isMinSignedConstant(RHS))) {
-      RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
-      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
-                           AN->getMemOperand());
-    }
+        (Opc == ISD::ATOMIC_LOAD_XOR && isMinSignedConstant(RHS)))
+      return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
+                           DAG.getNegative(RHS, DL, VT), AN->getMemOperand());
+
     assert(Opc == ISD::ATOMIC_LOAD_ADD &&
            "Used AtomicRMW ops other than Add should have been expanded!");
     return N;

From 0061616770c15b334c4ad1703a1a2502b6cd4485 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 29 Apr 2024 18:00:28 +0100
Subject: [PATCH 57/65] [X86] combinePredicateReduction - use
 DAG::getNegative() helper. NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0a58ee4a651a96..e9b4b08532c969 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43905,8 +43905,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
   EVT SetccVT = TLI.getSetCCResultType(DAG.getDataLayout(), Ctx, CmpVT);
   SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
   SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
-  SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
-  return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext);
+  return DAG.getNegative(Zext, DL, ExtractVT);
 }
 
 static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG,

From 3fca9d71447b7d3536e912d73ffd56a351e10bce Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 29 Apr 2024 18:15:45 +0100
Subject: [PATCH 58/65] [X86] combineMul - use DAG::getNegative() helper. NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e9b4b08532c969..f99db7a9c9b175 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47247,8 +47247,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
                            DAG.getConstant(AbsMulAmt, DL, VT));
       if (SignMulAmt < 0)
-        NewMul =
-            DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+        NewMul = DAG.getNegative(NewMul, DL, VT);
 
       return NewMul;
     }
@@ -47295,8 +47294,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
 
       // Negate the result.
       if (SignMulAmt < 0)
-        NewMul =
-            DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+        NewMul = DAG.getNegative(NewMul, DL, VT);
     } else if (!Subtarget.slowLEA())
       NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
   }
@@ -47312,10 +47310,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
           ISD::ADD, DL, VT, N->getOperand(0),
           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
                       DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, ShiftVT)));
-      // To negate, subtract the number from zero
       if (SignMulAmt < 0)
-        NewMul =
-            DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+        NewMul = DAG.getNegative(NewMul, DL, VT);
     } else if (isPowerOf2_64(AbsMulAmt + 1)) {
       // (mul x, 2^N - 1) => (sub (shl x, N), x)
       NewMul =

From 066dc1e05b8c5feca1f3957a10edc492705a5db2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 30 Apr 2024 10:36:46 +0100
Subject: [PATCH 59/65] [X86]
 combineMulToPMADDWD/combineMulToPMULDQ/reduceVMULWidth - pull out repeated
 SDLoc(). NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 40 ++++++++++++-------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f99db7a9c9b175..158bf7df07ec3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46875,7 +46875,7 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
 /// If %2 == zext32(trunc16(%2)), i.e., the scalar value range of %2 is
 /// 0 to 65535, and the scalar value range of %4 is also 0 to 65535,
 /// generate pmullw+pmulhuw for it (MULU16 mode).
-static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
+static SDValue reduceVMULWidth(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
                                const X86Subtarget &Subtarget) {
   // Check for legality
   // pmullw/pmulhw are not supported by SSE.
@@ -46894,7 +46894,6 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   if (!canReduceVMulWidth(N, DAG, Mode))
     return SDValue();
 
-  SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getOperand(0).getValueType();
@@ -47034,7 +47033,8 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
 // If the upper 17 bits of either element are zero and the other element are
 // zero/sign bits then we can use PMADDWD, which is always at least as quick as
 // PMULLD, except on KNL.
-static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
+static SDValue combineMulToPMADDWD(SDNode *N, const SDLoc &DL,
+                                   SelectionDAG &DAG,
                                    const X86Subtarget &Subtarget) {
   if (!Subtarget.hasSSE2())
     return SDValue();
@@ -47096,19 +47096,18 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
       return Op;
     // Mask off upper 16-bits of sign-extended constants.
     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, Op,
-                         DAG.getConstant(0xFFFF, SDLoc(N), VT));
+      return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(0xFFFF, DL, VT));
     if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) {
       SDValue Src = Op.getOperand(0);
       // Convert sext(vXi16) to zext(vXi16).
       if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128)
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src);
       // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets
       // which will expand the extension.
       if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.hasSSE41()) {
         EVT ExtVT = VT.changeVectorElementType(MVT::i16);
-        Src = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), ExtVT, Src);
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+        Src = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, Src);
+        return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src);
       }
     }
     // Convert SIGN_EXTEND_VECTOR_INREG to ZEXT_EXTEND_VECTOR_INREG.
@@ -47116,12 +47115,12 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
         N->isOnlyUserOf(Op.getNode())) {
       SDValue Src = Op.getOperand(0);
       if (Src.getScalarValueSizeInBits() == 16)
-        return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT, Src);
+        return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Src);
     }
     // Convert VSRAI(Op, 16) to VSRLI(Op, 16).
     if (Op.getOpcode() == X86ISD::VSRAI && Op.getConstantOperandVal(1) == 16 &&
         N->isOnlyUserOf(Op.getNode())) {
-      return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, Op.getOperand(0),
+      return DAG.getNode(X86ISD::VSRLI, DL, VT, Op.getOperand(0),
                          Op.getOperand(1));
     }
     return SDValue();
@@ -47142,11 +47141,10 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
                        DAG.getBitcast(OpVT, Ops[0]),
                        DAG.getBitcast(OpVT, Ops[1]));
   };
-  return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {N0, N1},
-                          PMADDWDBuilder);
+  return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMADDWDBuilder);
 }
 
-static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
+static SDValue combineMulToPMULDQ(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
   if (!Subtarget.hasSSE2())
     return SDValue();
@@ -47170,8 +47168,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
                             ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops);
     };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
-                            PMULDQBuilder, /*CheckBWI*/false);
+    return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULDQBuilder,
+                            /*CheckBWI*/ false);
   }
 
   // If the upper bits are zero we can use a single pmuludq.
@@ -47181,8 +47179,8 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
                              ArrayRef<SDValue> Ops) {
       return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops);
     };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 },
-                            PMULUDQBuilder, /*CheckBWI*/false);
+    return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULUDQBuilder,
+                            /*CheckBWI*/ false);
   }
 
   return SDValue();
@@ -47192,15 +47190,16 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI,
                           const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
+  SDLoc DL(N);
 
-  if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget))
+  if (SDValue V = combineMulToPMADDWD(N, DL, DAG, Subtarget))
     return V;
 
-  if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget))
+  if (SDValue V = combineMulToPMULDQ(N, DL, DAG, Subtarget))
     return V;
 
   if (DCI.isBeforeLegalize() && VT.isVector())
-    return reduceVMULWidth(N, DAG, Subtarget);
+    return reduceVMULWidth(N, DL, DAG, Subtarget);
 
   // Optimize a single multiply with constant into two operations in order to
   // implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
@@ -47240,7 +47239,6 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   assert(SignMulAmt != INT64_MIN && "Int min should have been handled!");
   uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
 
-  SDLoc DL(N);
   SDValue NewMul = SDValue();
   if (VT == MVT::i64 || VT == MVT::i32) {
     if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {

From 2cb97c7e29029dc552c66b3ddf6030d826538d47 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 30 Apr 2024 10:43:02 +0100
Subject: [PATCH 60/65] [X86] Add TODO for getTargetConstantFromBasePtr to
 support non-zero offsets.

As noted on #66991 - we sometimes share vector constant pool entries, referencing subvectors within them via pointer offsets
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 158bf7df07ec3a..859b2682d106c5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4639,6 +4639,7 @@ static ConstantPoolSDNode *getTargetConstantPoolFromBasePtr(SDValue Ptr) {
   return dyn_cast<ConstantPoolSDNode>(Ptr);
 }
 
+// TODO: Add support for non-zero offsets.
 static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) {
   ConstantPoolSDNode *CNode = getTargetConstantPoolFromBasePtr(Ptr);
   if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)

From 34c89eff64cce7debb8e2d5a0d1c4d896a23e432 Mon Sep 17 00:00:00 2001
From: Monad <yanwqmonad@gmail.com>
Date: Tue, 30 Apr 2024 18:07:40 +0800
Subject: [PATCH 61/65] [InstCombine] Fold `trunc nuw/nsw (x xor y) to i1` to
 `x != y` (#90408)

Fold:
``` llvm
define i1 @src(i8 %x, i8 %y) {
  %xor = xor i8 %x, %y
  %r = trunc nuw/nsw i8 %xor to i1
  ret i1 %r
}

define i1 @tgt(i8 %x, i8 %y) {
  %r = icmp ne i8 %x, %y
  ret i1 %r
}
```

Proof: https://alive2.llvm.org/ce/z/dcuHmn
---
 .../InstCombine/InstCombineCasts.cpp          |  6 +++
 llvm/test/Transforms/InstCombine/trunc.ll     | 41 +++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 567b27b4630439..11e31877de38c2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -770,6 +770,12 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
         return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
       }
     }
+
+    if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
+      Value *X, *Y;
+      if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
+        return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
+    }
   }
 
   Value *A, *B;
diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll
index e59b2bea6684c0..a180c3d52f6ab1 100644
--- a/llvm/test/Transforms/InstCombine/trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc.ll
@@ -1054,3 +1054,44 @@ define i8 @drop_both_trunc(i16 %x, i16 %y) {
   %res = trunc nuw nsw i16 %and2 to i8
   ret i8 %res
 }
+
+define i1 @trunc_xor(i8 %x, i8 %y) {
+; CHECK-LABEL: @trunc_xor(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc i8 [[XOR]] to i1
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xor = xor i8 %x, %y
+  %r = trunc i8 %xor to i1
+  ret i1 %r
+}
+
+define i1 @trunc_nuw_xor(i8 %x, i8 %y) {
+; CHECK-LABEL: @trunc_nuw_xor(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xor = xor i8 %x, %y
+  %r = trunc nuw i8 %xor to i1
+  ret i1 %r
+}
+
+define i1 @trunc_nsw_xor(i8 %x, i8 %y) {
+; CHECK-LABEL: @trunc_nsw_xor(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xor = xor i8 %x, %y
+  %r = trunc nsw i8 %xor to i1
+  ret i1 %r
+}
+
+define <2 x i1> @trunc_nuw_xor_vector(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @trunc_nuw_xor_vector(
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %xor = xor <2 x i8> %x, %y
+  %r = trunc nuw <2 x i8> %xor to <2 x i1>
+  ret <2 x i1> %r
+}

From 66e1d2c96a194f572be5b373705f493b1a4dc811 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Tue, 30 Apr 2024 11:12:58 +0100
Subject: [PATCH 62/65] [NFC][LLVM][SVE] Simplify isel for BSL and NBSL.
 (#90233)

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td    |  2 --
 llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 14 ++++++++++++--
 llvm/lib/Target/AArch64/SVEInstrFormats.td     | 16 ++--------------
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2159116d1ab7c4..17d96370c04a59 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -746,8 +746,6 @@ def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
 def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
 
 def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
-def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
-                         (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>;
 
 def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
 def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b90ac0ff1fe00a..62e68de1359f72 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -508,6 +508,16 @@ def AArch64smulh : PatFrag<(ops node:$op1, node:$op2),
 def AArch64umulh : PatFrag<(ops node:$op1, node:$op2),
                            (AArch64umulh_p (SVEAnyPredicate), node:$op1, node:$op2)>;
 
+
+def AArch64bsl  : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
+                           [(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3),
+                            (AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>;
+
+def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
+                           [(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3),
+                            (vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>;
+
+
 let Predicates = [HasSVE] in {
   def RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
   def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -3757,10 +3767,10 @@ let Predicates = [HasSVE2orSME] in {
   // SVE2 bitwise ternary operations
   defm EOR3_ZZZZ  : sve2_int_bitwise_ternary_op<0b000, "eor3",  AArch64eor3>;
   defm BCAX_ZZZZ  : sve2_int_bitwise_ternary_op<0b010, "bcax",  AArch64bcax>;
-  defm BSL_ZZZZ   : sve2_int_bitwise_ternary_op<0b001, "bsl",   int_aarch64_sve_bsl, AArch64bsp>;
+  defm BSL_ZZZZ   : sve2_int_bitwise_ternary_op<0b001, "bsl",   AArch64bsl>;
   defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
   defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
-  defm NBSL_ZZZZ  : sve2_int_bitwise_ternary_op<0b111, "nbsl",  int_aarch64_sve_nbsl, AArch64nbsl>;
+  defm NBSL_ZZZZ  : sve2_int_bitwise_ternary_op<0b111, "nbsl",  AArch64nbsl>;
 
   // SVE2 bitwise xor and rotate right by immediate
   defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 485b38a29a0f78..69c3238c7d614e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -605,13 +605,6 @@ class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
                       (vt (splat_vector (it immL))))),
       (inst $Pg, $Zs1, imm)>;
 
-// Used to re-order the operands of BSP when lowering to BSL. BSP has the order:
-// mask, in1, in2 whereas BSL for SVE2 has them ordered in1, in2, mask
-class SVE_3_Op_BSP_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
-                   ValueType vt2, ValueType vt3, Instruction inst>
-: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
-      (inst $Op2, $Op3, $Op1)>;
-
 class SVE_Shift_Add_All_Active_Pat<ValueType vtd, SDPatternOperator op, ValueType pt,
                                    ValueType vt1, ValueType vt2, ValueType vt3,
                                    Instruction inst>
@@ -4922,8 +4915,8 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
   let hasSideEffects = 0;
 }
 
-multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op,
-                                       SDPatternOperator ir_op = null_frag> {
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm,
+                                       SDPatternOperator op> {
   def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
 
   def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
@@ -4937,11 +4930,6 @@ multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperato
   def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
-
-  def : SVE_3_Op_BSP_Pat<nxv16i8, ir_op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_BSP_Pat<nxv8i16, ir_op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_BSP_Pat<nxv4i32, ir_op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_BSP_Pat<nxv2i64, ir_op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
 }
 
 class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,

From 7faf34307ea974886a10d8d06352a67cdeaf1c23 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Tue, 30 Apr 2024 18:36:13 +0800
Subject: [PATCH 63/65] [RISCV] Remove -riscv-insert-vsetvl-strict-asserts flag
 (#90171)

This flag has been enabled by default for almost two years now since
1f06398e96d4508d22f42b760f70eb5d4e7b1dc9, and at this stage we probably
shouldn't be falling back to the fixups.

This removes the flag so we always perform the assertion, as well as
making sure that CurInfo is always valid on exit: We shouldn't leave
emitVSETVLIs with an uninitialized VSETVLIInfo.
---
 llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 36 ++++----------------
 1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index b27e1dd258ebd0..216dc78085204e 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -45,10 +45,6 @@ static cl::opt<bool> DisableInsertVSETVLPHIOpt(
     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
     cl::desc("Disable looking through phis when inserting vsetvlis."));
 
-static cl::opt<bool> UseStrictAsserts(
-    "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
-    cl::desc("Enable strict assertion checking for the dataflow algorithm"));
-
 namespace {
 
 static unsigned getVLOpNum(const MachineInstr &MI) {
@@ -1432,32 +1428,14 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
     transferAfter(CurInfo, MI);
   }
 
-  // If we reach the end of the block and our current info doesn't match the
-  // expected info, insert a vsetvli to correct.
-  if (!UseStrictAsserts) {
-    const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
-    if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
-        CurInfo != ExitInfo) {
-      // Note there's an implicit assumption here that terminators never use
-      // or modify VL or VTYPE.  Also, fallthrough will return end().
-      auto InsertPt = MBB.getFirstInstrTerminator();
-      insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
-                    CurInfo);
-      CurInfo = ExitInfo;
-    }
-  }
-
-  if (UseStrictAsserts && CurInfo.isValid()) {
-    const auto &Info = BlockInfo[MBB.getNumber()];
-    if (CurInfo != Info.Exit) {
-      LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
-      LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
-      LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
-      LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
-    }
-    assert(CurInfo == Info.Exit &&
-           "InsertVSETVLI dataflow invariant violated");
+  const auto &Info = BlockInfo[MBB.getNumber()];
+  if (CurInfo != Info.Exit) {
+    LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
+    LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
+    LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
+    LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
   }
+  assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
 }
 
 /// Perform simple partial redundancy elimination of the VSETVLI instructions

From 2f9462e9e4f2b2b493673c39d4ad665175eb0b59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 30 Apr 2024 08:30:22 +0200
Subject: [PATCH 64/65] [clang][Interp] Fix initializing vectors from a list of
 other vectors

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 16 +++++++++++++---
 clang/lib/AST/Interp/Interp.h            | 18 ++++++++++++++++++
 clang/lib/AST/Interp/Opcodes.td          |  6 +++++-
 clang/test/AST/Interp/opencl.cl          |  7 +++++--
 4 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 17f95e7f3cac0d..b07c57ab86d9ad 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1186,11 +1186,21 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
       if (!this->visit(Init))
         return false;
 
-      if (!this->emitInitElem(ElemT, InitIndex, E))
-        return false;
-      ++InitIndex;
+      // If the initializer is of vector type itself, we have to deconstruct
+      // that and initialize all the target fields from the initializer fields.
+      if (const auto *InitVecT = Init->getType()->getAs<VectorType>()) {
+        if (!this->emitCopyArray(ElemT, 0, InitIndex, InitVecT->getNumElements(), E))
+          return false;
+        InitIndex += InitVecT->getNumElements();
+      } else {
+        if (!this->emitInitElem(ElemT, InitIndex, E))
+          return false;
+        ++InitIndex;
+      }
     }
 
+    assert(InitIndex <= NumVecElements);
+
     // Fill the rest with zeroes.
     for (; InitIndex != NumVecElements; ++InitIndex) {
       if (!this->visitZeroInitializer(ElemT, ElemQT, E))
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index 2b650d684be9c4..66d30cc3fbaaba 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -2090,6 +2090,24 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) {
   return true;
 }
 
+template <PrimType Name, class T = typename PrimConv<Name>::T>
+inline bool CopyArray(InterpState &S, CodePtr OpPC, uint32_t SrcIndex, uint32_t DestIndex, uint32_t Size) {
+  const auto &SrcPtr = S.Stk.pop<Pointer>();
+  const auto &DestPtr = S.Stk.peek<Pointer>();
+
+  for (uint32_t I = 0; I != Size; ++I) {
+    const Pointer &SP = SrcPtr.atIndex(SrcIndex + I);
+
+    if (!CheckLoad(S, OpPC, SP))
+      return false;
+
+    const Pointer &DP = DestPtr.atIndex(DestIndex + I);
+    DP.deref<T>() = SP.deref<T>();
+    DP.initialize();
+  }
+  return true;
+}
+
 /// Just takes a pointer and checks if it's an incomplete
 /// array type.
 inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index 2a97b978b52325..cfbd7f93c32de8 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -376,7 +376,11 @@ def ArrayElem : Opcode {
   let HasGroup = 1;
 }
 
-
+def CopyArray : Opcode {
+  let Args = [ArgUint32, ArgUint32, ArgUint32];
+  let Types = [AllTypeClass];
+  let HasGroup = 1;
+}
 
 //===----------------------------------------------------------------------===//
 // Direct field accessors
diff --git a/clang/test/AST/Interp/opencl.cl b/clang/test/AST/Interp/opencl.cl
index e7b9ec5caf2b1e..fd7756fff7c11e 100644
--- a/clang/test/AST/Interp/opencl.cl
+++ b/clang/test/AST/Interp/opencl.cl
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify=ref,both %s
-// RUN: %clang_cc1 -fsyntax-only -verify=expected,both %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -fsyntax-only -cl-std=CL2.0 -verify=ref,both %s
+// RUN: %clang_cc1 -fsyntax-only -cl-std=CL2.0 -verify=expected,both %s -fexperimental-new-constant-interpreter
 
 // both-no-diagnostics
 
@@ -33,3 +33,6 @@ void foo(int3 arg1, int8 arg2) {
 void negativeShift32(int a,int b) {
   char array0[((int)1)<<40];
 }
+
+int2 A = {1,2};
+int4 B = {(int2)(1,2), (int2)(3,4)};

From 3751ac41e794ffdae46e8efca1954b32d7cef66e Mon Sep 17 00:00:00 2001
From: Thorsten Blass <Thorsten.Blass@amd.com>
Date: Thu, 25 Apr 2024 12:02:54 -0500
Subject: [PATCH 65/65] [NFC][OpenMP][MI300] Refactoring of the checkIfAPU()
 method in preparation of an upstream patch.

This patch refactors the checkIfAPU method. The revised checkIfAPU() method, using the HSA symbols HSA_AGENT_INFO_AMD_MEMORY_PROPERTIES and HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU, will be upstreamed.

This patch reduces merge conflicts with the upstream method, as the detection of the GFX90a and MI300x is moved to separate methods. As such, the downstream method can be replaced by the upstream implementation.

Change-Id: Id10605e7ea2248538f26ebc717341b1735495a01
---
 offload/include/device.h                   |  4 ++
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 62 +++++++++++++++-------
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/offload/include/device.h b/offload/include/device.h
index 532a0f9100ad4e..3d284ebfaecb5b 100644
--- a/offload/include/device.h
+++ b/offload/include/device.h
@@ -164,6 +164,10 @@ struct DeviceTy {
   /// Ask the device whether it is an APU.
   bool checkIfAPU();
 
+  bool checkIfGFX90a();
+
+  bool checkIfMI300x();
+
   /// Ask the device whether it supports unified memory.
   bool supportsUnifiedMemory();
 
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 4b812c35ea4ba2..e4b7b92a4f91a7 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2758,6 +2758,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (auto Err = checkIfAPU())
       return Err;
 
+    // detect if device is GFX90a.
+    if (auto Err = checkIfGFX90a())
+      return Err;
+
+    // detect if device is an MI300X.
+    if (auto Err = checkIfMI300x())
+      return Err;
+
     // detect special cases for MI200 and MI300A
     specialBehaviorHandling();
 
@@ -3811,28 +3819,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   /// Detect if current architecture is an APU.
   Error checkIfAPU() {
     // TODO: replace with ROCr API once it becomes available.
-    // MI200
-    llvm::StringRef StrGfxName(ComputeUnitKind);
-    IsEquippedWithGFX90A = llvm::StringSwitch<bool>(StrGfxName)
-                               .Case("gfx90a", true)
-                               .Default(false);
-    if (IsEquippedWithGFX90A)
-      return Plugin::success();
-
     // MI300A
+    llvm::StringRef StrGfxName(ComputeUnitKind);
     IsAPU = llvm::StringSwitch<bool>(StrGfxName)
                 .Case("gfx940", true)
                 .Default(false);
     if (IsAPU)
       return Plugin::success();
 
-    // MI300X
-    IsEquippedWithMI300X = llvm::StringSwitch<bool>(StrGfxName)
-                               .Case("gfx941", true)
-                               .Default(false);
-    if (IsEquippedWithMI300X)
-      return Plugin::success();
-
     bool MayBeAPU = llvm::StringSwitch<bool>(StrGfxName)
                         .Case("gfx942", true)
                         .Default(false);
@@ -3844,11 +3838,43 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_CHIP_ID, ChipID))
       return Err;
 
-    if (!(ChipID & 0x1)) {
+    if (!(ChipID & 0x1))
       IsAPU = true;
+
+    return Plugin::success();
+  }
+
+  Error checkIfGFX90a() {
+    llvm::StringRef StrGfxName(ComputeUnitKind);
+    IsEquippedWithGFX90A = llvm::StringSwitch<bool>(StrGfxName)
+                               .Case("gfx90a", true)
+                               .Default(false);
+    return Plugin::success();
+  }
+
+  Error checkIfMI300x() {
+    llvm::StringRef StrGfxName(ComputeUnitKind);
+    IsEquippedWithMI300X = llvm::StringSwitch<bool>(StrGfxName)
+                               .Case("gfx941", true)
+                               .Default(false);
+
+    if (IsEquippedWithMI300X)
       return Plugin::success();
-    }
-    IsEquippedWithMI300X = true;
+
+    bool isMI300 = llvm::StringSwitch<bool>(StrGfxName)
+                       .Case("gfx942", true)
+                       .Default(false);
+    if (!isMI300)
+      return Plugin::success();
+
+    // Can be MI300A or MI300X
+    uint32_t ChipID = 0;
+    if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_CHIP_ID, ChipID))
+      return Err;
+
+    if (ChipID & 0x1)
+      IsEquippedWithMI300X = true;
+
     return Plugin::success();
   }