From a0651db490328a972185e44ff637970b3456406b Mon Sep 17 00:00:00 2001
From: Younan Zhang <zyn7109@gmail.com>
Date: Wed, 10 Apr 2024 19:23:32 +0800
Subject: [PATCH 01/12] [clang][Sema] Avoid guessing unexpanded packs' size in
 getFullyPackExpandedSize (#87768)

There has been an optimization for `SizeOfPackExprs` since c5452ed9, in
which
we overlooked a case where the template arguments were not yet
formed into a `PackExpansionType` at the token annotation stage. This
led to a problem in that a template involving such expressions may
lose its nature of being dependent, causing some false-positive
diagnostics.

Fixes https://github.com/llvm/llvm-project/issues/84220
---
 clang/docs/ReleaseNotes.rst                 |  1 +
 clang/lib/Sema/SemaTemplateVariadic.cpp     | 11 ++++++++++
 clang/test/SemaTemplate/alias-templates.cpp | 23 +++++++++++++++++++++
 3 files changed, 35 insertions(+)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f96cebbde3d82..6bff80ed4d210 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -526,6 +526,7 @@ Bug Fixes to C++ Support
 - Fix crash when inheriting from a cv-qualified type. Fixes:
   (`#35603 <https://github.com/llvm/llvm-project/issues/35603>`_)
 - Fix a crash when the using enum declaration uses an anonymous enumeration. Fixes (#GH86790).
+- Handled an edge case in ``getFullyPackExpandedSize`` so that we now avoid a false-positive diagnostic. (#GH84220)
 - Clang now correctly tracks type dependence of by-value captures in lambdas with an explicit
   object parameter.
   Fixes (#GH70604), (#GH79754), (#GH84163), (#GH84425), (#GH86054), (#GH86398), and (#GH86399).
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 903fbfd18e779..4909414c0c78d 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -1243,6 +1243,17 @@ std::optional<unsigned> Sema::getFullyPackExpandedSize(TemplateArgument Arg) {
     // expanded this pack expansion into the enclosing pack if we could.
     if (Elem.isPackExpansion())
       return std::nullopt;
+    // Don't guess the size of unexpanded packs. The pack within a template
+    // argument may have yet to be of a PackExpansion type before we see the
+    // ellipsis in the annotation stage.
+    //
+    // This doesn't mean we would invalidate the optimization: Arg can be an
+    // unexpanded pack regardless of Elem's dependence. For instance,
+    // A TemplateArgument that contains either a SubstTemplateTypeParmPackType
+    // or SubstNonTypeTemplateParmPackExpr is always considered Unexpanded, but
+    // the underlying TemplateArgument thereof may not.
+    if (Elem.containsUnexpandedParameterPack())
+      return std::nullopt;
   }
   return Pack.pack_size();
 }
diff --git a/clang/test/SemaTemplate/alias-templates.cpp b/clang/test/SemaTemplate/alias-templates.cpp
index 8d7cc6118610a..ab5cad72faf1b 100644
--- a/clang/test/SemaTemplate/alias-templates.cpp
+++ b/clang/test/SemaTemplate/alias-templates.cpp
@@ -236,6 +236,29 @@ namespace PR14858 {
   void test_q(int (&a)[5]) { Q<B, B, B>().f<B, B>(&a); }
 }
 
+namespace PR84220 {
+
+template <class...> class list {};
+
+template <int> struct foo_impl {
+  template <class> using f = int;
+};
+
+template <class... xs>
+using foo = typename foo_impl<sizeof...(xs)>::template f<xs...>;
+
+// We call getFullyPackExpandedSize at the annotation stage
+// before parsing the ellipsis next to the foo<xs>. This happens before
+// a PackExpansionType is formed for foo<xs>.
+// getFullyPackExpandedSize shouldn't determine the value here. Otherwise,
+// foo_impl<sizeof...(xs)> would lose its dependency despite the template
+// arguments being unsubstituted.
+template <class... xs> using test = list<foo<xs>...>;
+
+test<int> a;
+
+}
+
 namespace redecl {
   template<typename> using A = int;
   template<typename = void> using A = int;

From 8d206f51497fdf1ceebd6430b2f7d31ef735d0dc Mon Sep 17 00:00:00 2001
From: Edwin Vane <revane@google.com>
Date: Wed, 10 Apr 2024 07:40:35 -0400
Subject: [PATCH 02/12] [clang-tidy] Allow renaming macro arguments (#87792)

Although the identifier-naming.cpp lit test expected macro arguments not
to be renamed, the code seemed to already allow it. The code was simply
not being exercised because a SourceManager argument wasn't being
provided. With this change, renaming of macro arguments that expand to
renamable decls is permitted.
---
 .../utils/RenamerClangTidyCheck.cpp           |  2 +-
 clang-tools-extra/docs/ReleaseNotes.rst       |  2 +-
 .../readability/identifier-naming.cpp         | 23 +++++++++++++++----
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
index 69b7d40ef628d..ad8048e2a92b7 100644
--- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
@@ -489,7 +489,7 @@ void RenamerClangTidyCheck::checkNamedDecl(const NamedDecl *Decl,
   }
 
   Failure.Info = std::move(Info);
-  addUsage(Decl, Range);
+  addUsage(Decl, Range, &SourceMgr);
 }
 
 void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) {
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index a7193e90c38da..b66be44e9f8a6 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -268,7 +268,7 @@ Changes in existing checks
   <clang-tidy/checks/readability/identifier-naming>` check in `GetConfigPerFile`
   mode by resolving symbolic links to header files. Fixed handling of Hungarian
   Prefix when configured to `LowerCase`. Added support for renaming designated
-  initializers.
+  initializers. Added support for renaming macro arguments.
 
 - Improved :doc:`readability-implicit-bool-conversion
   <clang-tidy/checks/readability/implicit-bool-conversion>` check to provide
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
index 57ef4aae5ddb7..99149fe86acee 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp
@@ -108,10 +108,12 @@ USER_NS::object g_s2;
 // NO warnings or fixes expected as USER_NS and object are declared in a header file
 
 SYSTEM_MACRO(var1);
-// NO warnings or fixes expected as var1 is from macro expansion
+// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: invalid case style for global variable 'var1' [readability-identifier-naming]
+// CHECK-FIXES: {{^}}SYSTEM_MACRO(g_var1);
 
 USER_MACRO(var2);
-// NO warnings or fixes expected as var2 is declared in a macro expansion
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: invalid case style for global variable 'var2' [readability-identifier-naming]
+// CHECK-FIXES: {{^}}USER_MACRO(g_var2);
 
 #define BLA int FOO_bar
 BLA;
@@ -602,9 +604,20 @@ static void static_Function() {
 // CHECK-FIXES: {{^}}#define MY_TEST_MACRO(X) X()
 
 void MY_TEST_Macro(function) {}
-// CHECK-FIXES: {{^}}void MY_TEST_MACRO(function) {}
-}
-}
+// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: invalid case style for global function 'function' [readability-identifier-naming]
+// CHECK-FIXES: {{^}}void MY_TEST_MACRO(Function) {}
+
+#define MY_CAT_IMPL(l, r) l ## r
+#define MY_CAT(l, r) MY_CAT_IMPL(l, r)
+#define MY_MACRO2(foo) int MY_CAT(awesome_, MY_CAT(foo, __COUNTER__)) = 0
+#define MY_MACRO3(foo) int MY_CAT(awesome_, foo) = 0
+MY_MACRO2(myglob);
+MY_MACRO3(myglob);
+// No suggestions should occur even though the resulting decl of awesome_myglob#
+// or awesome_myglob are not entirely within a macro argument.
+
+} // namespace InlineNamespace
+} // namespace FOO_NS
 
 template <typename t_t> struct a {
 // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: invalid case style for struct 'a'

From f2ade91a9fe7c222ea919748d30b74397911ecc8 Mon Sep 17 00:00:00 2001
From: Jeff Niu <jeff@modular.com>
Date: Wed, 10 Apr 2024 14:11:45 +0200
Subject: [PATCH 03/12] [mlir] Optimize getting properties on concrete ops
 (#88259)

This makes retrieving properties on concrete operations faster by
removing a branch when it is known that the operation must have
properties.
---
 mlir/include/mlir/IR/OpDefinition.h | 2 +-
 mlir/include/mlir/IR/Operation.h    | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index c177ae3594d11..2d1dee2303e8f 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -1965,7 +1965,7 @@ class Op : public OpState, public Traits<ConcreteType>... {
     if constexpr (!hasProperties())
       return getEmptyProperties();
     return *getOperation()
-                ->getPropertiesStorage()
+                ->getPropertiesStorageUnsafe()
                 .template as<InferredProperties<T> *>();
   }
 
diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h
index 3ffd3517fe5a6..c52a6fcac10c1 100644
--- a/mlir/include/mlir/IR/Operation.h
+++ b/mlir/include/mlir/IR/Operation.h
@@ -895,8 +895,7 @@ class alignas(8) Operation final
   /// Returns the properties storage.
   OpaqueProperties getPropertiesStorage() {
     if (propertiesStorageSize)
-      return {
-          reinterpret_cast<void *>(getTrailingObjects<detail::OpProperties>())};
+      return getPropertiesStorageUnsafe();
     return {nullptr};
   }
   OpaqueProperties getPropertiesStorage() const {
@@ -905,6 +904,12 @@ class alignas(8) Operation final
           getTrailingObjects<detail::OpProperties>()))};
     return {nullptr};
   }
+  /// Returns the properties storage without checking whether properties are
+  /// present.
+  OpaqueProperties getPropertiesStorageUnsafe() {
+    return {
+        reinterpret_cast<void *>(getTrailingObjects<detail::OpProperties>())};
+  }
 
   /// Return the properties converted to an attribute.
   /// This is expensive, and mostly useful when dealing with unregistered

From 94ed57dab64ccb248a342a91957f390209c5c7ce Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 10 Apr 2024 13:30:29 +0100
Subject: [PATCH 04/12] [PhaseOrdering] Add test for #85551.

Add test for missed hoisting of checks from std::span
https://github.com/llvm/llvm-project/issues/85551
---
 .../AArch64/hoist-runtime-checks.ll           | 143 ++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index c6c9a52167d54..a140e17a0dd15 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -91,8 +91,151 @@ for.end:                                          ; preds = %for.cond.cleanup
   ret i32 %9
 }
 
+%"class.std::__1::span" = type { ptr, i64 }
+%"class.std::__1::__wrap_iter" = type { ptr }
+
+define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1, i64 noundef %n) {
+; CHECK-LABEL: define dso_local noundef i32 @sum_prefix_with_sum(
+; CHECK-SAME: ptr nocapture readonly [[S_COERCE0:%.*]], i64 [[S_COERCE1:%.*]], i64 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP5_NOT:%.*]] = icmp eq i64 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP5_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT:    [[DOTNOT_NOT:%.*]] = icmp ult i64 [[TMP0]], [[S_COERCE1]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[RET_0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[RET_06:%.*]] = phi i32 [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    br i1 [[DOTNOT_NOT]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[COND_FALSE_I:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       cond.false.i:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       span_checked_access.exit:
+; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[I_07]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP7]], [[RET_06]]
+; CHECK-NEXT:    [[INC]] = add nuw i64 [[I_07]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+;
+entry:
+  %s = alloca %"class.std::__1::span", align 8
+  %n.addr = alloca i64, align 8
+  %ret = alloca i32, align 4
+  %i = alloca i64, align 8
+  %0 = getelementptr inbounds { ptr, i64 }, ptr %s, i32 0, i32 0
+  store ptr %s.coerce0, ptr %0, align 8
+  %1 = getelementptr inbounds { ptr, i64 }, ptr %s, i32 0, i32 1
+  store i64 %s.coerce1, ptr %1, align 8
+  store i64 %n, ptr %n.addr, align 8
+  call void @llvm.lifetime.start.p0(i64 4, ptr %ret) #7
+  store i32 0, ptr %ret, align 4
+  call void @llvm.lifetime.start.p0(i64 8, ptr %i) #7
+  store i64 0, ptr %i, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i64, ptr %i, align 8
+  %3 = load i64, ptr %n.addr, align 8
+  %cmp = icmp ult i64 %2, %3
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.lifetime.end.p0(i64 8, ptr %i) #7
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i64, ptr %i, align 8
+  %call = call noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(ptr noundef nonnull align 8 dereferenceable(16) %s, i64 noundef %4) #7
+  %5 = load i32, ptr %call, align 4
+  %6 = load i32, ptr %ret, align 4
+  %add = add nsw i32 %6, %5
+  store i32 %add, ptr %ret, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i64, ptr %i, align 8
+  %inc = add i64 %7, 1
+  store i64 %inc, ptr %i, align 8
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond.cleanup
+  %8 = load i32, ptr %ret, align 4
+  call void @llvm.lifetime.end.p0(i64 4, ptr %ret)
+  ret i32 %8
+}
+
+define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(ptr noundef nonnull align 8 dereferenceable(16) %this, i64 noundef %__idx) {
+; CHECK-LABEL: define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(
+; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]], i64 noundef [[__IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], [[__IDX]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF0]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[THIS]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[__IDX]]
+; CHECK-NEXT:    ret ptr [[ARRAYIDX]]
+;
+entry:
+  %this.addr = alloca ptr, align 8
+  %__idx.addr = alloca i64, align 8
+  store ptr %this, ptr %this.addr, align 8
+  store i64 %__idx, ptr %__idx.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %0 = load i64, ptr %__idx.addr, align 8
+  %call = call noundef i64 @span_access(ptr noundef nonnull align 8 dereferenceable(16) %this1)
+  %cmp = icmp ult i64 %0, %call
+  %conv = zext i1 %cmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  call void @llvm.trap()
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %__data_ = getelementptr inbounds %"class.std::__1::span", ptr %this1, i32 0, i32 0
+  %1 = load ptr, ptr %__data_, align 8
+  %2 = load i64, ptr %__idx.addr, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %1, i64 %2
+  ret ptr %arrayidx
+}
+
+define hidden noundef i64 @span_access(ptr noundef nonnull align 8 dereferenceable(16) %this) {
+; CHECK-LABEL: define hidden noundef i64 @span_access(
+; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SIZE_:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[__SIZE_]], align 8
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
+entry:
+  %this.addr = alloca ptr, align 8
+  store ptr %this, ptr %this.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %__size_ = getelementptr inbounds %"class.std::__1::span", ptr %this1, i32 0, i32 1
+  %0 = load i64, ptr %__size_, align 8
+  ret i64 %0
+}
+
 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
 
 declare void @llvm.trap()
 
 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 2000, i32 1}
+;.

From a2bdbc6f0da2a9d0cecb23c64bd20423b3fd0340 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Wed, 10 Apr 2024 14:37:18 +0200
Subject: [PATCH 05/12] [LLD][COFF] Check machine types in ICF::equalsConstant.
 (#88140)

Avoid replacing replacing a chunk with one from a different type. It's
mostly a concern for ARM64X, where we don't want to merge aarch64 and
arm64ec chunks, but it may also in theory happen between arm64ec and
 x86_64 chunks.
---
 lld/COFF/ICF.cpp           |  2 +-
 lld/test/COFF/arm64x-icf.s | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/COFF/arm64x-icf.s

diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 013ffcfb3d5d1..b899a25324239 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -178,7 +178,7 @@ bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
          a->getSectionName() == b->getSectionName() &&
          a->header->SizeOfRawData == b->header->SizeOfRawData &&
          a->checksum == b->checksum && a->getContents() == b->getContents() &&
-         assocEquals(a, b);
+         a->getMachine() == b->getMachine() && assocEquals(a, b);
 }
 
 // Compare "moving" part of two sections, namely relocation targets.
diff --git a/lld/test/COFF/arm64x-icf.s b/lld/test/COFF/arm64x-icf.s
new file mode 100644
index 0000000000000..c8df21d3e4969
--- /dev/null
+++ b/lld/test/COFF/arm64x-icf.s
@@ -0,0 +1,37 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-arm64ec.s -o func-arm64ec.obj
+// RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-arm64.s -o func-arm64.obj
+// RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll func-arm64ec.obj func-arm64.obj
+// RUN: llvm-objdump -d out.dll | FileCheck %s
+
+// CHECK:      0000000180001000 <.text>:
+// CHECK-NEXT: 180001000: 52800020     mov     w0, #0x1                // =1
+// CHECK-NEXT: 180001004: d65f03c0     ret
+// CHECK-NEXT:                 ...
+// CHECK-NEXT: 180002000: 52800020     mov     w0, #0x1                // =1
+// CHECK-NEXT: 180002004: d65f03c0     ret
+
+
+#--- func-arm64.s
+        .section .text,"xr",discard,func
+        .globl func
+        .p2align 2
+func:
+        mov w0, #1
+        ret
+
+        .data
+        .rva func
+
+#--- func-arm64ec.s
+        .section .text,"xr",discard,"#func"
+        .globl "#func"
+        .p2align 2
+"#func":
+        mov w0, #1
+        ret
+
+        .data
+        .rva "#func"

From b47e439559ad03a1b32614f573aad66f145a634d Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski <sdkrystian@gmail.com>
Date: Wed, 10 Apr 2024 08:38:49 -0400
Subject: [PATCH 06/12] Revert "[Clang][Sema] Fix crash when 'this' is used in
 a dependent class scope function template specialization that instantiates to
 a static member function" (#88264)

Reverts llvm/llvm-project#87541
---
 clang/docs/ReleaseNotes.rst                   |  2 -
 clang/include/clang/Sema/Sema.h               |  8 +---
 clang/lib/Sema/SemaExpr.cpp                   |  5 +--
 clang/lib/Sema/SemaExprCXX.cpp                | 44 ++++++-------------
 clang/lib/Sema/SemaExprMember.cpp             | 42 +++++-------------
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  8 ----
 clang/lib/Sema/TreeTransform.h                |  7 ++-
 ...ms-function-specialization-class-scope.cpp | 44 ++-----------------
 8 files changed, 36 insertions(+), 124 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6bff80ed4d210..f5359afe1f099 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -520,8 +520,6 @@ Bug Fixes to C++ Support
 - Fix an issue caused by not handling invalid cases when substituting into the parameter mapping of a constraint. Fixes (#GH86757).
 - Fixed a bug that prevented member function templates of class templates declared with a deduced return type
   from being explicitly specialized for a given implicit instantiation of the class template.
-- Fixed a crash when ``this`` is used in a dependent class scope function template specialization
-  that instantiates to a static member function.
 
 - Fix crash when inheriting from a cv-qualified type. Fixes:
   (`#35603 <https://github.com/llvm/llvm-project/issues/35603>`_)
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index f311f9f374345..9769d36900664 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5439,8 +5439,7 @@ class Sema final : public SemaBase {
 
   ExprResult BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R,
                                       bool NeedsADL,
-                                      bool AcceptInvalidDecl = false,
-                                      bool NeedUnresolved = false);
+                                      bool AcceptInvalidDecl = false);
   ExprResult BuildDeclarationNameExpr(
       const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D,
       NamedDecl *FoundD = nullptr,
@@ -6592,10 +6591,7 @@ class Sema final : public SemaBase {
                             SourceLocation RParenLoc);
 
   //// ActOnCXXThis -  Parse 'this' pointer.
-  ExprResult ActOnCXXThis(SourceLocation Loc);
-
-  /// Check whether the type of 'this' is valid in the current context.
-  bool CheckCXXThisType(SourceLocation Loc, QualType Type);
+  ExprResult ActOnCXXThis(SourceLocation loc);
 
   /// Build a CXXThisExpr and mark it referenced in the current context.
   Expr *BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 45acbf197ea6b..594c11788f4e7 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3442,11 +3442,10 @@ static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) {
 
 ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS,
                                           LookupResult &R, bool NeedsADL,
-                                          bool AcceptInvalidDecl,
-                                          bool NeedUnresolved) {
+                                          bool AcceptInvalidDecl) {
   // If this is a single, fully-resolved result and we don't need ADL,
   // just build an ordinary singleton decl ref.
-  if (!NeedUnresolved && !NeedsADL && R.isSingleResult() &&
+  if (!NeedsADL && R.isSingleResult() &&
       !R.getAsSingle<FunctionTemplateDecl>() &&
       !ShouldLookupResultBeMultiVersionOverload(R))
     return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(),
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 9822477260e59..7b9b8f149d9ed 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1415,42 +1415,26 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
 }
 
 ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
-  // C++20 [expr.prim.this]p1:
-  //   The keyword this names a pointer to the object for which an
-  //   implicit object member function is invoked or a non-static
-  //   data member's initializer is evaluated.
+  /// C++ 9.3.2: In the body of a non-static member function, the keyword this
+  /// is a non-lvalue expression whose value is the address of the object for
+  /// which the function is called.
   QualType ThisTy = getCurrentThisType();
 
-  if (CheckCXXThisType(Loc, ThisTy))
-    return ExprError();
+  if (ThisTy.isNull()) {
+    DeclContext *DC = getFunctionLevelDeclContext();
 
-  return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false);
-}
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(DC);
+        Method && Method->isExplicitObjectMemberFunction()) {
+      return Diag(Loc, diag::err_invalid_this_use) << 1;
+    }
 
-bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) {
-  if (!Type.isNull())
-    return false;
+    if (isLambdaCallWithExplicitObjectParameter(CurContext))
+      return Diag(Loc, diag::err_invalid_this_use) << 1;
 
-  // C++20 [expr.prim.this]p3:
-  //   If a declaration declares a member function or member function template
-  //   of a class X, the expression this is a prvalue of type
-  //   "pointer to cv-qualifier-seq X" wherever X is the current class between
-  //   the optional cv-qualifier-seq and the end of the function-definition,
-  //   member-declarator, or declarator. It shall not appear within the
-  //   declaration of either a static member function or an explicit object
-  //   member function of the current class (although its type and value
-  //   category are defined within such member functions as they are within
-  //   an implicit object member function).
-  DeclContext *DC = getFunctionLevelDeclContext();
-  if (const auto *Method = dyn_cast<CXXMethodDecl>(DC);
-      Method && Method->isExplicitObjectMemberFunction()) {
-    Diag(Loc, diag::err_invalid_this_use) << 1;
-  } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) {
-    Diag(Loc, diag::err_invalid_this_use) << 1;
-  } else {
-    Diag(Loc, diag::err_invalid_this_use) << 0;
+    return Diag(Loc, diag::err_invalid_this_use) << 0;
   }
-  return true;
+
+  return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false);
 }
 
 Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type,
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index 8cd2288d279cc..32998ae60eafe 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -61,10 +61,6 @@ enum IMAKind {
   /// The reference is a contextually-permitted abstract member reference.
   IMA_Abstract,
 
-  /// Whether the context is static is dependent on the enclosing template (i.e.
-  /// in a dependent class scope explicit specialization).
-  IMA_Dependent,
-
   /// The reference may be to an unresolved using declaration and the
   /// context is not an instance method.
   IMA_Unresolved_StaticOrExplicitContext,
@@ -95,18 +91,10 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
 
   DeclContext *DC = SemaRef.getFunctionLevelDeclContext();
 
-  bool couldInstantiateToStatic = false;
-  bool isStaticOrExplicitContext = SemaRef.CXXThisTypeOverride.isNull();
-
-  if (auto *MD = dyn_cast<CXXMethodDecl>(DC)) {
-    if (MD->isImplicitObjectMemberFunction()) {
-      isStaticOrExplicitContext = false;
-      // A dependent class scope function template explicit specialization
-      // that is neither declared 'static' nor with an explicit object
-      // parameter could instantiate to a static or non-static member function.
-      couldInstantiateToStatic = MD->getDependentSpecializationInfo();
-    }
-  }
+  bool isStaticOrExplicitContext =
+      SemaRef.CXXThisTypeOverride.isNull() &&
+      (!isa<CXXMethodDecl>(DC) || cast<CXXMethodDecl>(DC)->isStatic() ||
+       cast<CXXMethodDecl>(DC)->isExplicitObjectMemberFunction());
 
   if (R.isUnresolvableResult())
     return isStaticOrExplicitContext ? IMA_Unresolved_StaticOrExplicitContext
@@ -135,9 +123,6 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
   if (Classes.empty())
     return IMA_Static;
 
-  if (couldInstantiateToStatic)
-    return IMA_Dependent;
-
   // C++11 [expr.prim.general]p12:
   //   An id-expression that denotes a non-static data member or non-static
   //   member function of a class can only be used:
@@ -283,30 +268,27 @@ ExprResult Sema::BuildPossibleImplicitMemberExpr(
     const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, LookupResult &R,
     const TemplateArgumentListInfo *TemplateArgs, const Scope *S,
     UnresolvedLookupExpr *AsULE) {
-  switch (IMAKind Classification = ClassifyImplicitMemberAccess(*this, R)) {
+  switch (ClassifyImplicitMemberAccess(*this, R)) {
   case IMA_Instance:
+    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, true, S);
+
   case IMA_Mixed:
   case IMA_Mixed_Unrelated:
   case IMA_Unresolved:
-    return BuildImplicitMemberExpr(
-        SS, TemplateKWLoc, R, TemplateArgs,
-        /*IsKnownInstance=*/Classification == IMA_Instance, S);
+    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, false,
+                                   S);
+
   case IMA_Field_Uneval_Context:
     Diag(R.getNameLoc(), diag::warn_cxx98_compat_non_static_member_use)
       << R.getLookupNameInfo().getName();
     [[fallthrough]];
   case IMA_Static:
   case IMA_Abstract:
-  case IMA_Dependent:
   case IMA_Mixed_StaticOrExplicitContext:
   case IMA_Unresolved_StaticOrExplicitContext:
     if (TemplateArgs || TemplateKWLoc.isValid())
-      return BuildTemplateIdExpr(SS, TemplateKWLoc, R, /*RequiresADL=*/false,
-                                 TemplateArgs);
-    return AsULE ? AsULE
-                 : BuildDeclarationNameExpr(
-                       SS, R, /*NeedsADL=*/false, /*AcceptInvalidDecl=*/false,
-                       /*NeedUnresolved=*/Classification == IMA_Dependent);
+      return BuildTemplateIdExpr(SS, TemplateKWLoc, R, false, TemplateArgs);
+    return AsULE ? AsULE : BuildDeclarationNameExpr(SS, R, false);
 
   case IMA_Error_StaticOrExplicitContext:
   case IMA_Error_Unrelated:
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 8248b10814fea..127a432367b95 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5093,14 +5093,6 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   EnterExpressionEvaluationContext EvalContext(
       *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
-  Qualifiers ThisTypeQuals;
-  CXXRecordDecl *ThisContext = nullptr;
-  if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(Function)) {
-    ThisContext = Method->getParent();
-    ThisTypeQuals = Method->getMethodQualifiers();
-  }
-  CXXThisScopeRAII ThisScope(*this, ThisContext, ThisTypeQuals);
-
   // Introduce a new scope where local variable instantiations will be
   // recorded, unless we're actually a member function within a local
   // class, in which case we need to merge our results with the parent
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 13d7b00430d52..d4d2fa61d65ea 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -3307,13 +3307,12 @@ class TreeTransform {
 
   /// Build a new C++ "this" expression.
   ///
-  /// By default, performs semantic analysis to build a new "this" expression.
-  /// Subclasses may override this routine to provide different behavior.
+  /// By default, builds a new "this" expression without performing any
+  /// semantic analysis. Subclasses may override this routine to provide
+  /// different behavior.
   ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc,
                                 QualType ThisType,
                                 bool isImplicit) {
-    if (getSema().CheckCXXThisType(ThisLoc, ThisType))
-      return ExprError();
     return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit);
   }
 
diff --git a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp
index 6977623a0816e..dcab9bfaeabcb 100644
--- a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp
+++ b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 -fms-extensions -fsyntax-only -Wno-unused-value -verify %s
-// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -Wno-unused-value -verify %s
+// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -verify %s
 
+// expected-no-diagnostics
 class A {
 public:
   template<class U> A(U p) {}
@@ -75,42 +76,3 @@ struct S {
   int f<0>(int);
 };
 }
-
-namespace UsesThis {
-  template<typename T>
-  struct A {
-    int x;
-
-    template<typename U>
-    static void f();
-
-    template<>
-    void f<int>() {
-      this->x; // expected-error {{invalid use of 'this' outside of a non-static member function}}
-      x; // expected-error {{invalid use of member 'x' in static member function}}
-      A::x; // expected-error {{invalid use of member 'x' in static member function}}
-      +x; // expected-error {{invalid use of member 'x' in static member function}}
-      +A::x; // expected-error {{invalid use of member 'x' in static member function}}
-    }
-
-    template<typename U>
-    void g();
-
-    template<>
-    void g<int>() {
-      this->x;
-      x;
-      A::x;
-      +x;
-      +A::x;
-    }
-
-    template<typename U>
-    static auto h() -> A*;
-
-    template<>
-    auto h<int>() -> decltype(this); // expected-error {{'this' cannot be used in a static member function declaration}}
-  };
-
-  template struct A<int>; // expected-note 2{{in instantiation of}}
-}

From 1ca01958310f2956abd72ece1652c3218bcf27e1 Mon Sep 17 00:00:00 2001
From: Krystian Stasiowski <sdkrystian@gmail.com>
Date: Wed, 10 Apr 2024 08:41:22 -0400
Subject: [PATCH 07/12] [Clang][AST][NFC] Fix printing of dependent
 PackIndexTypes (#88146)

Dependent `PackIndexType`s currently print the memory address of the
index `Expr*` rather than pretty printing the expression. This patch
fixes that.
---
 clang/lib/AST/TypePrinter.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 075c8aba11fcb..9602f448e9427 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1213,10 +1213,13 @@ void TypePrinter::printDecltypeBefore(const DecltypeType *T, raw_ostream &OS) {
 
 void TypePrinter::printPackIndexingBefore(const PackIndexingType *T,
                                           raw_ostream &OS) {
-  if (T->hasSelectedType())
+  if (T->hasSelectedType()) {
     OS << T->getSelectedType();
-  else
-    OS << T->getPattern() << "...[" << T->getIndexExpr() << "]";
+  } else {
+    OS << T->getPattern() << "...[";
+    T->getIndexExpr()->printPretty(OS, nullptr, Policy);
+    OS << "]";
+  }
   spaceBeforePlaceHolder(OS);
 }
 

From 49ef12a08c4c7d7ae4765929e72fe2320a12b08c Mon Sep 17 00:00:00 2001
From: Johannes Reifferscheid <jreiffers@google.com>
Date: Wed, 10 Apr 2024 14:55:56 +0200
Subject: [PATCH 08/12] Fix complex log1p accuracy with large abs values.
 (#88260)

This ports https://github.com/openxla/xla/pull/10503 by @pearu. The new
implementation matches mpmath's results for most inputs, see caveats in
the linked pull request. In addition to the filecheck test here, the
accuracy was tested with XLA's complex_unary_op_test and its MLIR
emitters.
---
 .../ComplexToStandard/ComplexToStandard.cpp   | 50 ++++++++++---------
 .../convert-to-standard.mlir                  | 48 +++++++++++-------
 2 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
index 9c3c4d96a301e..0aa1de5fa5d9a 100644
--- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
+++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp
@@ -570,37 +570,39 @@ struct Log1pOpConversion : public OpConversionPattern<complex::Log1pOp> {
                   ConversionPatternRewriter &rewriter) const override {
     auto type = cast<ComplexType>(adaptor.getComplex().getType());
     auto elementType = cast<FloatType>(type.getElementType());
-    arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr();
+    arith::FastMathFlags fmf = op.getFastMathFlagsAttr().getValue();
     mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter);
 
-    Value real = b.create<complex::ReOp>(elementType, adaptor.getComplex());
-    Value imag = b.create<complex::ImOp>(elementType, adaptor.getComplex());
+    Value real = b.create<complex::ReOp>(adaptor.getComplex());
+    Value imag = b.create<complex::ImOp>(adaptor.getComplex());
 
     Value half = b.create<arith::ConstantOp>(elementType,
                                              b.getFloatAttr(elementType, 0.5));
     Value one = b.create<arith::ConstantOp>(elementType,
                                             b.getFloatAttr(elementType, 1));
-    Value two = b.create<arith::ConstantOp>(elementType,
-                                            b.getFloatAttr(elementType, 2));
-
-    // log1p(a+bi) = .5*log((a+1)^2+b^2) + i*atan2(b, a + 1)
-    // log((a+1)+bi) = .5*log(a*a + 2*a + 1 + b*b) + i*atan2(b, a+1)
-    // log((a+1)+bi) = .5*log1p(a*a + 2*a + b*b) + i*atan2(b, a+1)
-    Value sumSq = b.create<arith::MulFOp>(real, real, fmf.getValue());
-    sumSq = b.create<arith::AddFOp>(
-        sumSq, b.create<arith::MulFOp>(real, two, fmf.getValue()),
-        fmf.getValue());
-    sumSq = b.create<arith::AddFOp>(
-        sumSq, b.create<arith::MulFOp>(imag, imag, fmf.getValue()),
-        fmf.getValue());
-    Value logSumSq =
-        b.create<math::Log1pOp>(elementType, sumSq, fmf.getValue());
-    Value resultReal = b.create<arith::MulFOp>(logSumSq, half, fmf.getValue());
-
-    Value realPlusOne = b.create<arith::AddFOp>(real, one, fmf.getValue());
-
-    Value resultImag =
-        b.create<math::Atan2Op>(elementType, imag, realPlusOne, fmf.getValue());
+    Value realPlusOne = b.create<arith::AddFOp>(real, one, fmf);
+    Value absRealPlusOne = b.create<math::AbsFOp>(realPlusOne, fmf);
+    Value absImag = b.create<math::AbsFOp>(imag, fmf);
+
+    Value maxAbs = b.create<arith::MaximumFOp>(absRealPlusOne, absImag, fmf);
+    Value minAbs = b.create<arith::MinimumFOp>(absRealPlusOne, absImag, fmf);
+
+    Value maxAbsOfRealPlusOneAndImagMinusOne = b.create<arith::SelectOp>(
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, realPlusOne, absImag,
+                                fmf),
+        real, b.create<arith::SubFOp>(maxAbs, one, fmf));
+    Value minMaxRatio = b.create<arith::DivFOp>(minAbs, maxAbs, fmf);
+    Value logOfMaxAbsOfRealPlusOneAndImag =
+        b.create<math::Log1pOp>(maxAbsOfRealPlusOneAndImagMinusOne, fmf);
+    Value logOfSqrtPart = b.create<math::Log1pOp>(
+        b.create<arith::MulFOp>(minMaxRatio, minMaxRatio, fmf), fmf);
+    Value r = b.create<arith::AddFOp>(
+        b.create<arith::MulFOp>(half, logOfSqrtPart, fmf),
+        logOfMaxAbsOfRealPlusOneAndImag, fmf);
+    Value resultReal = b.create<arith::SelectOp>(
+        b.create<arith::CmpFOp>(arith::CmpFPredicate::UNO, r, r, fmf), minAbs,
+        r);
+    Value resultImag = b.create<math::Atan2Op>(imag, realPlusOne, fmf);
     rewriter.replaceOpWithNewOp<complex::CreateOp>(op, type, resultReal,
                                                    resultImag);
     return success();
diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
index f5d9499eadda4..43918904a09f4 100644
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@@ -300,15 +300,22 @@ func.func @complex_log1p(%arg: complex<f32>) -> complex<f32> {
 // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex<f32>
 // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32
 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32
-// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32
-// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32
-// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32
-// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32
-// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32
-// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32
-// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32
 // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32
+// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] : f32
+// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] : f32
+// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32
+// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32
+// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] : f32
+// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 : f32
+// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32
+// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] : f32
+// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] : f32
+// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] : f32
+// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] : f32
+// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] : f32
+// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] : f32
+// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] : f32
+// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32
 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32
 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex<f32>
 // CHECK: return %[[RESULT]] : complex<f32>
@@ -963,15 +970,22 @@ func.func @complex_log1p_with_fmf(%arg: complex<f32>) -> complex<f32> {
 // CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex<f32>
 // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32
 // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32
-// CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] fastmath<nnan,contract> : f32
-// CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] fastmath<nnan,contract> : f32
-// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] fastmath<nnan,contract> : f32
-// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] fastmath<nnan,contract> : f32
-// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] fastmath<nnan,contract> : f32
-// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] fastmath<nnan,contract> : f32
-// CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] fastmath<nnan,contract> : f32
-// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath<nnan,contract> : f32
+// CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] fastmath<nnan,contract>  : f32
+// CHECK: %[[ABS_REAL_PLUS_ONE:.*]] = math.absf %[[REAL_PLUS_ONE]] fastmath<nnan,contract>  : f32
+// CHECK: %[[ABS_IMAG:.*]] = math.absf %[[IMAG]] fastmath<nnan,contract>  : f32
+// CHECK: %[[MAX:.*]] = arith.maximumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath<nnan,contract>  : f32
+// CHECK: %[[MIN:.*]] = arith.minimumf %[[ABS_REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath<nnan,contract>  : f32
+// CHECK: %[[CMPF:.*]] = arith.cmpf ogt, %[[REAL_PLUS_ONE]], %[[ABS_IMAG]] fastmath<nnan,contract>  : f32
+// CHECK: %[[MAX_MINUS_ONE:.*]] = arith.subf %[[MAX]], %cst_0 fastmath<nnan,contract>  : f32
+// CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %0, %[[MAX_MINUS_ONE]] : f32
+// CHECK: %[[MIN_MAX_RATIO:.*]] = arith.divf %[[MIN]], %[[MAX]] fastmath<nnan,contract>  : f32
+// CHECK: %[[LOG_1:.*]] = math.log1p %[[SELECT]] fastmath<nnan,contract> : f32
+// CHECK: %[[RATIO_SQ:.*]] = arith.mulf %[[MIN_MAX_RATIO]], %[[MIN_MAX_RATIO]] fastmath<nnan,contract>  : f32
+// CHECK: %[[LOG_SQ:.*]] = math.log1p %[[RATIO_SQ]] fastmath<nnan,contract>  : f32
+// CHECK: %[[HALF_LOG_SQ:.*]] = arith.mulf %cst, %[[LOG_SQ]] fastmath<nnan,contract>  : f32
+// CHECK: %[[R:.*]] = arith.addf %[[HALF_LOG_SQ]], %[[LOG_1]] fastmath<nnan,contract>  : f32
+// CHECK: %[[ISNAN:.*]] = arith.cmpf uno, %[[R]], %[[R]] fastmath<nnan,contract> : f32
+// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[ISNAN]], %[[MIN]], %[[R]] : f32
 // CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] fastmath<nnan,contract> : f32
 // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex<f32>
 // CHECK: return %[[RESULT]] : complex<f32>

From 54a9f0007cb4f19d2e9df30405c5027229f5def0 Mon Sep 17 00:00:00 2001
From: annamthomas <anna@azul.com>
Date: Wed, 10 Apr 2024 09:02:23 -0400
Subject: [PATCH 09/12] [SCEV] Fix BinomialCoefficient Iteration to fit in W
 bits (#88010)

BinomialCoefficient computes the value of W-bit IV at iteration It of a loop. When W is 1, we can call multiplicative inverse on 0 which triggers an assert since 1b76120.

Since the arithmetic is supposed to wrap if It or K does not fit in W bits, do the truncation into W bits after we do the shift.

 Fixes #87798
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  6 +-
 llvm/test/Analysis/ScalarEvolution/pr87798.ll | 68 +++++++++++++++++++
 2 files changed, 70 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/pr87798.ll

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e030b9fc7dac4..9fcce797f5597 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -928,11 +928,9 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
   APInt OddFactorial(W, 1);
   unsigned T = 1;
   for (unsigned i = 3; i <= K; ++i) {
-    APInt Mult(W, i);
-    unsigned TwoFactors = Mult.countr_zero();
+    unsigned TwoFactors = countr_zero(i);
     T += TwoFactors;
-    Mult.lshrInPlace(TwoFactors);
-    OddFactorial *= Mult;
+    OddFactorial *= (i >> TwoFactors);
   }
 
   // We need at least W + T bits for the multiplication step
diff --git a/llvm/test/Analysis/ScalarEvolution/pr87798.ll b/llvm/test/Analysis/ScalarEvolution/pr87798.ll
new file mode 100644
index 0000000000000..acd445993e47b
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/pr87798.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -disable-output -passes='print<scalar-evolution>' -verify-scev < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+; print<scalar-evolution> is used to compute SCEVs for all values in the
+; function.
+; We should not crash on multiplicative inverse called within SCEV's binomial
+; coefficient function.
+
+define i32 @pr87798() {
+; CHECK-LABEL: 'pr87798'
+; CHECK-NEXT:  Classifying expressions for: @pr87798
+; CHECK-NEXT:    %phi = phi i32 [ 0, %bb ], [ %add4, %bb1 ]
+; CHECK-NEXT:    --> {0,+,0,+,0,+,2,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %phi2 = phi i32 [ 0, %bb ], [ %add, %bb1 ]
+; CHECK-NEXT:    --> {0,+,0,+,1}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %phi3 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%bb1> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %add = add i32 %phi2, %phi3
+; CHECK-NEXT:    --> {0,+,1,+,1}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %mul = mul i32 %phi2, %phi3
+; CHECK-NEXT:    --> {0,+,0,+,2,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %add4 = add i32 %mul, %phi
+; CHECK-NEXT:    --> {0,+,0,+,2,+,5,+,3}<%bb1> U: full-set S: full-set Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %and = and i32 %phi, 1
+; CHECK-NEXT:    --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %add5 = add i32 %phi3, 1
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%bb1> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %bb1: Computable }
+; CHECK-NEXT:    %phi9 = phi i32 [ %and, %bb1 ]
+; CHECK-NEXT:    --> (zext i1 {false,+,false,+,false,+,false,+,true}<%bb1> to i32) U: [0,2) S: [0,2) --> 0 U: [0,1) S: [0,1)
+; CHECK-NEXT:    %zext = zext i32 %phi9 to i64
+; CHECK-NEXT:    --> poison U: full-set S: full-set
+; CHECK-NEXT:  Determining loop execution counts for: @pr87798
+; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %bb1: backedge-taken count is i1 false
+; CHECK-NEXT:  Loop %bb1: constant max backedge-taken count is i1 false
+; CHECK-NEXT:  Loop %bb1: symbolic max backedge-taken count is i1 false
+; CHECK-NEXT:  Loop %bb1: Trip multiple is 1
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %phi = phi i32 [ 0, %bb ], [ %add4, %bb1 ]
+  %phi2 = phi i32 [ 0, %bb ], [ %add, %bb1 ]
+  %phi3 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
+  %add = add i32 %phi2, %phi3
+  %mul = mul i32 %phi2, %phi3
+  %add4 = add i32 %mul, %phi
+  %and = and i32 %phi, 1
+  %add5 = add i32 %phi3, 1
+  br i1 true, label %preheader, label %bb1
+
+preheader:                                              ; preds = %bb1
+  %phi9 = phi i32 [ %and, %bb1 ]
+  br label %loop
+
+loop:                                              ; preds = %preheader, %loop
+  br label %loop
+
+bb7:                                              ; No predecessors!
+  %zext = zext i32 %phi9 to i64
+  ret i32 0
+}

From 938a73422e0b964eba16f272acdfae1d0281772c Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 4 Apr 2024 13:30:57 -0700
Subject: [PATCH 10/12] [SLP][NFC]Walk over entries, not single values.

Better to walk over SLP nodes rather than single values. Matching
a value to a node is not a 1-to-1 relation, one value may be part of
several nodes and compiler may get wrong node, when trying to map it.
Currently there are no such issues detected, but they may appear in
future.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 339 +++++++++---------
 1 file changed, 167 insertions(+), 172 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c3dcf73b0b762..22ef9b5fb994e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2325,19 +2325,17 @@ class BoUpSLP {
   ~BoUpSLP();
 
 private:
-  /// Determine if a vectorized value \p V in can be demoted to
-  /// a smaller type with a truncation. We collect the values that will be
-  /// demoted in ToDemote and additional roots that require investigating in
-  /// Roots.
-  /// \param DemotedConsts list of Instruction/OperandIndex pairs that are
-  /// constant and to be demoted. Required to correctly identify constant nodes
-  /// to be demoted.
-  bool collectValuesToDemote(
-      Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
-      SmallVectorImpl<Value *> &ToDemote,
-      DenseMap<Instruction *, SmallVector<unsigned>> &DemotedConsts,
-      DenseSet<Value *> &Visited, unsigned &MaxDepthLevel,
-      bool &IsProfitableToDemote, bool IsTruncRoot) const;
+  /// Determine if a node \p E in can be demoted to a smaller type with a
+  /// truncation. We collect the entries that will be demoted in ToDemote.
+  /// \param E Node for analysis
+  /// \param ToDemote indices of the nodes to be demoted.
+  bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
+                             unsigned &BitWidth,
+                             SmallVectorImpl<unsigned> &ToDemote,
+                             DenseSet<const TreeEntry *> &Visited,
+                             unsigned &MaxDepthLevel,
+                             bool &IsProfitableToDemote,
+                             bool IsTruncRoot) const;
 
   /// Check if the operands on the edges \p Edges of the \p UserTE allows
   /// reordering (i.e. the operands can be reordered because they have only one
@@ -14126,20 +14124,17 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
   return Width;
 }
 
-// Determine if a value V in a vectorizable expression Expr can be demoted to a
-// smaller type with a truncation. We collect the values that will be demoted
-// in ToDemote and additional roots that require investigating in Roots.
 bool BoUpSLP::collectValuesToDemote(
-    Value *V, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
-    SmallVectorImpl<Value *> &ToDemote,
-    DenseMap<Instruction *, SmallVector<unsigned>> &DemotedConsts,
-    DenseSet<Value *> &Visited, unsigned &MaxDepthLevel,
-    bool &IsProfitableToDemote, bool IsTruncRoot) const {
+    const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
+    SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
+    unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
+    bool IsTruncRoot) const {
   // We can always demote constants.
-  if (isa<Constant>(V))
+  if (all_of(E.Scalars, IsaPred<Constant>))
     return true;
 
-  if (DL->getTypeSizeInBits(V->getType()) == BitWidth) {
+  unsigned OrigBitWidth = DL->getTypeSizeInBits(E.Scalars.front()->getType());
+  if (OrigBitWidth == BitWidth) {
     MaxDepthLevel = 1;
     return true;
   }
@@ -14150,7 +14145,6 @@ bool BoUpSLP::collectValuesToDemote(
   auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
     if (MultiNodeScalars.contains(V))
       return false;
-    uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType());
     if (OrigBitWidth > BitWidth) {
       APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
       if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
@@ -14168,47 +14162,50 @@ bool BoUpSLP::collectValuesToDemote(
     BitWidth = std::max(BitWidth, BitWidth1);
     return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
   };
-  auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
+  using namespace std::placeholders;
+  auto FinalAnalysis = [&]() {
     if (!IsProfitableToDemote)
       return false;
-    return (ITE && ITE->UserTreeIndices.size() > 1) ||
-           IsPotentiallyTruncated(V, BitWidth);
+    bool Res = all_of(
+        E.Scalars, std::bind(IsPotentiallyTruncated, _1, std::ref(BitWidth)));
+    // Gather demoted constant operands.
+    if (Res && E.State == TreeEntry::NeedToGather &&
+        all_of(E.Scalars, IsaPred<Constant>))
+      ToDemote.push_back(E.Idx);
+    return Res;
   };
   // TODO: improve handling of gathered values and others.
-  auto *I = dyn_cast<Instruction>(V);
-  const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
-  if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
-      all_of(I->users(), [&](User *U) {
-        return isa<InsertElementInst>(U) && !getTreeEntry(U);
+  if (E.State == TreeEntry::NeedToGather || !Visited.insert(&E).second ||
+      any_of(E.Scalars, [&](Value *V) {
+        return all_of(V->users(), [&](User *U) {
+          return isa<InsertElementInst>(U) && !getTreeEntry(U);
+        });
       }))
     return FinalAnalysis();
 
-  if (!all_of(I->users(),
-              [=](User *U) {
-                return getTreeEntry(U) ||
-                       (UserIgnoreList && UserIgnoreList->contains(U)) ||
-                       (U->getType()->isSized() &&
-                        !U->getType()->isScalableTy() &&
-                        DL->getTypeSizeInBits(U->getType()) <= BitWidth);
-              }) &&
-      !IsPotentiallyTruncated(I, BitWidth))
+  if (any_of(E.Scalars, [&](Value *V) {
+        return !all_of(V->users(), [=](User *U) {
+          return getTreeEntry(U) ||
+                 (UserIgnoreList && UserIgnoreList->contains(U)) ||
+                 (U->getType()->isSized() && !U->getType()->isScalableTy() &&
+                  DL->getTypeSizeInBits(U->getType()) <= BitWidth);
+        }) && !IsPotentiallyTruncated(V, BitWidth);
+      }))
     return false;
 
-  unsigned Start = 0;
-  unsigned End = I->getNumOperands();
-
-  auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
+  auto ProcessOperands = [&](ArrayRef<const TreeEntry *> Operands,
+                             bool &NeedToExit) {
     NeedToExit = false;
     unsigned InitLevel = MaxDepthLevel;
-    for (Value *IncValue : Operands) {
+    for (const TreeEntry *Op : Operands) {
       unsigned Level = InitLevel;
-      if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
-                                 ToDemote, DemotedConsts, Visited, Level,
-                                 IsProfitableToDemote, IsTruncRoot)) {
+      if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
+                                 ToDemote, Visited, Level, IsProfitableToDemote,
+                                 IsTruncRoot)) {
         if (!IsProfitableToDemote)
           return false;
         NeedToExit = true;
-        if (!FinalAnalysis(ITE))
+        if (!FinalAnalysis())
           return false;
         continue;
       }
@@ -14220,7 +14217,6 @@ bool BoUpSLP::collectValuesToDemote(
       [&](function_ref<bool(unsigned, unsigned)> Checker, bool &NeedToExit) {
         // Try all bitwidth < OrigBitWidth.
         NeedToExit = false;
-        uint32_t OrigBitWidth = DL->getTypeSizeInBits(I->getType());
         unsigned BestFailBitwidth = 0;
         for (; BitWidth < OrigBitWidth; BitWidth *= 2) {
           if (Checker(BitWidth, OrigBitWidth))
@@ -14241,18 +14237,20 @@ bool BoUpSLP::collectValuesToDemote(
         return false;
       };
   auto TryProcessInstruction =
-      [&](Instruction *I, const TreeEntry &ITE, unsigned &BitWidth,
-          ArrayRef<Value *> Operands = std::nullopt,
+      [&](unsigned &BitWidth,
+          ArrayRef<const TreeEntry *> Operands = std::nullopt,
           function_ref<bool(unsigned, unsigned)> Checker = {}) {
         if (Operands.empty()) {
           if (!IsTruncRoot)
             MaxDepthLevel = 1;
-          (void)IsPotentiallyTruncated(V, BitWidth);
+          (void)for_each(E.Scalars, std::bind(IsPotentiallyTruncated, _1,
+                                              std::ref(BitWidth)));
         } else {
           // Several vectorized uses? Check if we can truncate it, otherwise -
           // exit.
-          if (ITE.UserTreeIndices.size() > 1 &&
-              !IsPotentiallyTruncated(I, BitWidth))
+          if (E.UserTreeIndices.size() > 1 &&
+              !all_of(E.Scalars, std::bind(IsPotentiallyTruncated, _1,
+                                           std::ref(BitWidth))))
             return false;
           bool NeedToExit = false;
           if (Checker && !AttemptCheckBitwidth(Checker, NeedToExit))
@@ -14266,26 +14264,22 @@ bool BoUpSLP::collectValuesToDemote(
         }
 
         ++MaxDepthLevel;
-        // Gather demoted constant operands.
-        for (unsigned Idx : seq<unsigned>(Start, End))
-          if (isa<Constant>(I->getOperand(Idx)))
-            DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx);
-        // Record the value that we can demote.
-        ToDemote.push_back(V);
+        // Record the entry that we can demote.
+        ToDemote.push_back(E.Idx);
         return IsProfitableToDemote;
       };
-  switch (I->getOpcode()) {
+  switch (E.getOpcode()) {
 
   // We can always demote truncations and extensions. Since truncations can
   // seed additional demotion, we save the truncated value.
   case Instruction::Trunc:
     if (IsProfitableToDemoteRoot)
       IsProfitableToDemote = true;
-    return TryProcessInstruction(I, *ITE, BitWidth);
+    return TryProcessInstruction(BitWidth);
   case Instruction::ZExt:
   case Instruction::SExt:
     IsProfitableToDemote = true;
-    return TryProcessInstruction(I, *ITE, BitWidth);
+    return TryProcessInstruction(BitWidth);
 
   // We can demote certain binary operations if we can demote both of their
   // operands.
@@ -14295,112 +14289,128 @@ bool BoUpSLP::collectValuesToDemote(
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    return TryProcessInstruction(I, *ITE, BitWidth,
-                                 {I->getOperand(0), I->getOperand(1)});
+    return TryProcessInstruction(
+        BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)});
   }
   case Instruction::Shl: {
     // If we are truncating the result of this SHL, and if it's a shift of an
     // inrange amount, we can always perform a SHL in a smaller type.
     auto ShlChecker = [&](unsigned BitWidth, unsigned) {
-      KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
-      return AmtKnownBits.getMaxValue().ult(BitWidth);
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
+        return AmtKnownBits.getMaxValue().ult(BitWidth);
+      });
     };
     return TryProcessInstruction(
-        I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, ShlChecker);
+        BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, ShlChecker);
   }
   case Instruction::LShr: {
     // If this is a truncate of a logical shr, we can truncate it to a smaller
     // lshr iff we know that the bits we would otherwise be shifting in are
     // already zeros.
     auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
-      KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
-      APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
-      return AmtKnownBits.getMaxValue().ult(BitWidth) &&
-             MaskedValueIsZero(I->getOperand(0), ShiftedBits,
-                               SimplifyQuery(*DL));
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
+        APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+        return AmtKnownBits.getMaxValue().ult(BitWidth) &&
+               MaskedValueIsZero(I->getOperand(0), ShiftedBits,
+                                 SimplifyQuery(*DL));
+      });
     };
     return TryProcessInstruction(
-        I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, LShrChecker);
+        BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)},
+        LShrChecker);
   }
   case Instruction::AShr: {
     // If this is a truncate of an arithmetic shr, we can truncate it to a
     // smaller ashr iff we know that all the bits from the sign bit of the
     // original type and the sign bit of the truncate type are similar.
     auto AShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
-      KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
-      unsigned ShiftedBits = OrigBitWidth - BitWidth;
-      return AmtKnownBits.getMaxValue().ult(BitWidth) &&
-             ShiftedBits <
-                 ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
+        unsigned ShiftedBits = OrigBitWidth - BitWidth;
+        return AmtKnownBits.getMaxValue().ult(BitWidth) &&
+               ShiftedBits < ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
+                                                nullptr, DT);
+      });
     };
     return TryProcessInstruction(
-        I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, AShrChecker);
+        BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)},
+        AShrChecker);
   }
   case Instruction::UDiv:
   case Instruction::URem: {
     // UDiv and URem can be truncated if all the truncated bits are zero.
     auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
       assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
-      APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
-      return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
-             MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+        return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
+               MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
+      });
     };
-    return TryProcessInstruction(I, *ITE, BitWidth,
-                                 {I->getOperand(0), I->getOperand(1)}, Checker);
+    return TryProcessInstruction(
+        BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, Checker);
   }
 
   // We can demote selects if we can demote their true and false values.
   case Instruction::Select: {
-    Start = 1;
-    auto *SI = cast<SelectInst>(I);
-    return TryProcessInstruction(I, *ITE, BitWidth,
-                                 {SI->getTrueValue(), SI->getFalseValue()});
+    return TryProcessInstruction(
+        BitWidth, {getOperandEntry(&E, 1), getOperandEntry(&E, 2)});
   }
 
   // We can demote phis if we can demote all their incoming operands. Note that
   // we don't need to worry about cycles since we ensure single use above.
   case Instruction::PHI: {
-    PHINode *PN = cast<PHINode>(I);
-    SmallVector<Value *> Ops(PN->incoming_values().begin(),
-                             PN->incoming_values().end());
-    return TryProcessInstruction(I, *ITE, BitWidth, Ops);
+    const unsigned NumOps = E.getNumOperands();
+    SmallVector<const TreeEntry *> Ops(NumOps);
+    transform(seq<unsigned>(0, NumOps), Ops.begin(),
+              std::bind(&BoUpSLP::getOperandEntry, this, &E, _1));
+
+    return TryProcessInstruction(BitWidth, Ops);
   }
 
   case Instruction::Call: {
-    auto *IC = dyn_cast<IntrinsicInst>(I);
+    auto *IC = dyn_cast<IntrinsicInst>(E.getMainOp());
     if (!IC)
       break;
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
     if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
         ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
       break;
-    SmallVector<Value *> Operands(1, I->getOperand(0));
+    SmallVector<const TreeEntry *, 2> Operands(1, getOperandEntry(&E, 0));
     function_ref<bool(unsigned, unsigned)> CallChecker;
     auto CompChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
       assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
-      if (ID == Intrinsic::umin || ID == Intrinsic::umax) {
-        APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
-        return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
-               MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
-      }
-      assert((ID == Intrinsic::smin || ID == Intrinsic::smax) &&
-             "Expected min/max intrinsics only.");
-      unsigned SignBits = OrigBitWidth - BitWidth;
-      return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
-                                            nullptr, DT) &&
-             SignBits <=
-                 ComputeNumSignBits(I->getOperand(1), *DL, 0, AC, nullptr, DT);
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        if (ID == Intrinsic::umin || ID == Intrinsic::umax) {
+          APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
+          return MaskedValueIsZero(I->getOperand(0), Mask,
+                                   SimplifyQuery(*DL)) &&
+                 MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
+        }
+        assert((ID == Intrinsic::smin || ID == Intrinsic::smax) &&
+               "Expected min/max intrinsics only.");
+        unsigned SignBits = OrigBitWidth - BitWidth;
+        return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
+                                              nullptr, DT) &&
+               SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
+                                              nullptr, DT);
+      });
     };
-    End = 1;
     if (ID != Intrinsic::abs) {
-      Operands.push_back(I->getOperand(1));
-      End = 2;
+      Operands.push_back(getOperandEntry(&E, 1));
       CallChecker = CompChecker;
     }
     InstructionCost BestCost =
         std::numeric_limits<InstructionCost::CostType>::max();
     unsigned BestBitWidth = BitWidth;
-    unsigned VF = ITE->Scalars.size();
+    unsigned VF = E.Scalars.size();
     // Choose the best bitwidth based on cost estimations.
     auto Checker = [&](unsigned BitWidth, unsigned) {
       unsigned MinBW = PowerOf2Ceil(BitWidth);
@@ -14419,7 +14429,7 @@ bool BoUpSLP::collectValuesToDemote(
     [[maybe_unused]] bool NeedToExit;
     (void)AttemptCheckBitwidth(Checker, NeedToExit);
     BitWidth = BestBitWidth;
-    return TryProcessInstruction(I, *ITE, BitWidth, Operands, CallChecker);
+    return TryProcessInstruction(BitWidth, Operands, CallChecker);
   }
 
   // Otherwise, conservatively give up.
@@ -14473,26 +14483,27 @@ void BoUpSLP::computeMinimumValueSizes() {
     ++NodeIdx;
   }
 
-  // Analyzed in reduction already and not profitable - exit.
+  // Analyzed the reduction already and not profitable - exit.
   if (AnalyzedMinBWVals.contains(VectorizableTree[NodeIdx]->Scalars.front()))
     return;
 
-  SmallVector<Value *> ToDemote;
-  DenseMap<Instruction *, SmallVector<unsigned>> DemotedConsts;
-  auto ComputeMaxBitWidth = [&](ArrayRef<Value *> TreeRoot, unsigned VF,
-                                bool IsTopRoot, bool IsProfitableToDemoteRoot,
-                                unsigned Opcode, unsigned Limit,
-                                bool IsTruncRoot, bool IsSignedCmp) {
+  SmallVector<unsigned> ToDemote;
+  auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot,
+                                bool IsProfitableToDemoteRoot, unsigned Opcode,
+                                unsigned Limit, bool IsTruncRoot,
+                                bool IsSignedCmp) {
     ToDemote.clear();
-    auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
+    unsigned VF = E.getVectorFactor();
+    auto *TreeRootIT = dyn_cast<IntegerType>(E.Scalars.front()->getType());
     if (!TreeRootIT || !Opcode)
       return 0u;
 
-    if (AnalyzedMinBWVals.contains(TreeRoot.front()))
+    if (any_of(E.Scalars,
+               [&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
       return 0u;
 
-    unsigned NumParts = TTI->getNumberOfParts(
-        FixedVectorType::get(TreeRoot.front()->getType(), VF));
+    unsigned NumParts =
+        TTI->getNumberOfParts(FixedVectorType::get(TreeRootIT, VF));
 
     // The maximum bit width required to represent all the values that can be
     // demoted without loss of precision. It would be safe to truncate the roots
@@ -14505,14 +14516,14 @@ void BoUpSLP::computeMinimumValueSizes() {
     // True.
     // Determine if the sign bit of all the roots is known to be zero. If not,
     // IsKnownPositive is set to False.
-    bool IsKnownPositive = !IsSignedCmp && all_of(TreeRoot, [&](Value *R) {
+    bool IsKnownPositive = !IsSignedCmp && all_of(E.Scalars, [&](Value *R) {
       KnownBits Known = computeKnownBits(R, *DL);
       return Known.isNonNegative();
     });
 
     // We first check if all the bits of the roots are demanded. If they're not,
     // we can truncate the roots to this narrower type.
-    for (auto *Root : TreeRoot) {
+    for (Value *Root : E.Scalars) {
       unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT);
       TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType());
       unsigned BitWidth1 = NumTypeBits - NumSignBits;
@@ -14557,23 +14568,22 @@ void BoUpSLP::computeMinimumValueSizes() {
     // Conservatively determine if we can actually truncate the roots of the
     // expression. Collect the values that can be demoted in ToDemote and
     // additional roots that require investigating in Roots.
-    for (auto *Root : TreeRoot) {
-      DenseSet<Value *> Visited;
-      unsigned MaxDepthLevel = IsTruncRoot ? Limit : 1;
-      bool NeedToDemote = IsProfitableToDemote;
-
-      if (!collectValuesToDemote(Root, IsProfitableToDemoteRoot, MaxBitWidth,
-                                 ToDemote, DemotedConsts, Visited,
-                                 MaxDepthLevel, NeedToDemote, IsTruncRoot) ||
-          (MaxDepthLevel <= Limit &&
-           !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
-              (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
-               DL->getTypeSizeInBits(Root->getType()) /
-                       DL->getTypeSizeInBits(
-                           cast<Instruction>(Root)->getOperand(0)->getType()) >
-                   2)))))
-        return 0u;
-    }
+    DenseSet<const TreeEntry *> Visited;
+    unsigned MaxDepthLevel = IsTruncRoot ? Limit : 1;
+    bool NeedToDemote = IsProfitableToDemote;
+
+    if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
+                               ToDemote, Visited, MaxDepthLevel, NeedToDemote,
+                               IsTruncRoot) ||
+        (MaxDepthLevel <= Limit &&
+         !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
+            (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
+             DL->getTypeSizeInBits(TreeRootIT) /
+                     DL->getTypeSizeInBits(cast<Instruction>(E.Scalars.front())
+                                               ->getOperand(0)
+                                               ->getType()) >
+                 2)))))
+      return 0u;
     // Round MaxBitWidth up to the next power-of-two.
     MaxBitWidth = bit_ceil(MaxBitWidth);
 
@@ -14624,8 +14634,8 @@ void BoUpSLP::computeMinimumValueSizes() {
                 VectorizableTree.front()->Scalars.front()->getType()))
       Limit = 3;
     unsigned MaxBitWidth = ComputeMaxBitWidth(
-        TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot,
-        IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot, IsSignedCmp);
+        *VectorizableTree[NodeIdx].get(), IsTopRoot, IsProfitableToDemoteRoot,
+        Opcode, Limit, IsTruncRoot, IsSignedCmp);
     if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) {
       if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth)
         ReductionBitWidth = bit_ceil(MaxBitWidth);
@@ -14634,13 +14644,15 @@ void BoUpSLP::computeMinimumValueSizes() {
     }
 
     for (unsigned Idx : RootDemotes) {
-      Value *V = VectorizableTree[Idx]->Scalars.front();
-      uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType());
-      if (OrigBitWidth > MaxBitWidth) {
-      APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth);
-      if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
-        ToDemote.push_back(V);
-      }
+      if (all_of(VectorizableTree[Idx]->Scalars, [&](Value *V) {
+            uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType());
+            if (OrigBitWidth > MaxBitWidth) {
+              APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth);
+              return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
+            }
+            return false;
+          }))
+        ToDemote.push_back(Idx);
     }
     RootDemotes.clear();
     IsTopRoot = false;
@@ -14687,9 +14699,8 @@ void BoUpSLP::computeMinimumValueSizes() {
 
     // Finally, map the values we can demote to the maximum bit with we
     // computed.
-    for (Value *Scalar : ToDemote) {
-      TreeEntry *TE = getTreeEntry(Scalar);
-      assert(TE && "Expected vectorized scalar.");
+    for (unsigned Idx : ToDemote) {
+      TreeEntry *TE = VectorizableTree[Idx].get();
       if (MinBWs.contains(TE))
         continue;
       bool IsSigned = TE->getOpcode() == Instruction::SExt ||
@@ -14697,22 +14708,6 @@ void BoUpSLP::computeMinimumValueSizes() {
                         return !isKnownNonNegative(R, SimplifyQuery(*DL));
                       });
       MinBWs.try_emplace(TE, MaxBitWidth, IsSigned);
-      const auto *I = cast<Instruction>(Scalar);
-      auto DCIt = DemotedConsts.find(I);
-      if (DCIt != DemotedConsts.end()) {
-        for (unsigned Idx : DCIt->getSecond()) {
-          // Check that all instructions operands are demoted.
-          const TreeEntry *CTE = getOperandEntry(TE, Idx);
-          if (all_of(TE->Scalars,
-                     [&](Value *V) {
-                       auto SIt = DemotedConsts.find(cast<Instruction>(V));
-                       return SIt != DemotedConsts.end() &&
-                              is_contained(SIt->getSecond(), Idx);
-                     }) ||
-              all_of(CTE->Scalars, IsaPred<Constant>))
-            MinBWs.try_emplace(CTE, MaxBitWidth, IsSigned);
-        }
-      }
     }
   }
 }

From 50d368aee981738cd05f3d16f5d1cfc122c9b0ab Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Wed, 10 Apr 2024 09:07:43 -0500
Subject: [PATCH 11/12] [LinkerWrapper] Relax ordering of static libraries for
 offloading (#87532)

Summary:
The linker wrapper attempts to maintain consistent semantics with
existing host invocations. Static libraries by default only extract if
there are non-weak symbols that remain undefined. However, we have
situations between linkers that put different meanings on ordering. The
ld.bfd linker requires static libraries to be defined after the symbols,
while `ld.lld` relaxes this rule. The linker wrapper went with the
former as it's the easier solution, however this has caused a lot of
issues as I've had to explain this rule to several people, it also make
it difficult to include things like `libc` in the OpenMP runtime because
it would sometimes be linked before or after.

This patch reworks the logic to more or less perform the following logic
for static libraries.

  1. Split library / object inputs.
  2. Include every object input and record its undefined symbols
  3. Repeatedly try to extract static libraries to resolve these
     symbols. If a file is extracted we need to check every library
     again to resolve any new undefined symbols.

This allows the following to work and will cause fewer issues when
replacing HIP, which does `--whole-archive` so it's very likely the old
logic will regress.
```console
$ clang -lfoo main.c -fopenmp --offload-arch=native
```
---
 clang/test/Driver/linker-wrapper-libs.c       |   4 +
 .../ClangLinkerWrapper.cpp                    | 121 ++++++++++++------
 2 files changed, 83 insertions(+), 42 deletions(-)

diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c
index 9a78200d7d3cf..119e306857187 100644
--- a/clang/test/Driver/linker-wrapper-libs.c
+++ b/clang/test/Driver/linker-wrapper-libs.c
@@ -44,6 +44,8 @@ int bar() { return weak; }
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES
 
 // LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
@@ -66,6 +68,8 @@ int bar() { return weak; }
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL
 
 // LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 73e695a67093e..a1879fc7712dc 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1448,9 +1448,9 @@ getDeviceInput(const ArgList &Args) {
   StringSaver Saver(Alloc);
 
   // Try to extract device code from the linker input files.
-  DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputFiles;
-  DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
   bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false;
+  SmallVector<OffloadFile> ObjectFilesToExtract;
+  SmallVector<OffloadFile> ArchiveFilesToExtract;
   for (const opt::Arg *Arg : Args.filtered(
            OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
     if (Arg->getOption().matches(OPT_whole_archive) ||
@@ -1486,50 +1486,87 @@ getDeviceInput(const ArgList &Args) {
     if (Error Err = extractOffloadBinaries(Buffer, Binaries))
       return std::move(Err);
 
-    // We only extract archive members that are needed.
-    bool IsArchive = identify_magic(Buffer.getBuffer()) == file_magic::archive;
-    bool Extracted = true;
-    while (Extracted) {
-      Extracted = false;
-      for (OffloadFile &Binary : Binaries) {
-        // If the binary was previously extracted it will be set to null.
-        if (!Binary.getBinary())
+    for (auto &OffloadFile : Binaries) {
+      if (identify_magic(Buffer.getBuffer()) == file_magic::archive &&
+          !WholeArchive)
+        ArchiveFilesToExtract.emplace_back(std::move(OffloadFile));
+      else
+        ObjectFilesToExtract.emplace_back(std::move(OffloadFile));
+    }
+  }
+
+  // Link all standard input files and update the list of symbols.
+  DenseMap<OffloadFile::TargetID, SmallVector<OffloadFile>> InputFiles;
+  DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
+  for (OffloadFile &Binary : ObjectFilesToExtract) {
+    if (!Binary.getBinary())
+      continue;
+
+    SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
+    for (const auto &[ID, Input] : InputFiles)
+      if (object::areTargetsCompatible(Binary, ID))
+        CompatibleTargets.emplace_back(ID);
+
+    for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
+      Expected<bool> ExtractOrErr = getSymbols(
+          Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(),
+          /*IsArchive=*/false, Saver, Syms[ID]);
+      if (!ExtractOrErr)
+        return ExtractOrErr.takeError();
+
+      // If another target needs this binary it must be copied instead.
+      if (Index == CompatibleTargets.size() - 1)
+        InputFiles[ID].emplace_back(std::move(Binary));
+      else
+        InputFiles[ID].emplace_back(Binary.copy());
+    }
+  }
+
+  // Archive members only extract if they define needed symbols. We do this
+  // after every regular input file so that libraries may be included out of
+  // order. This follows 'ld.lld' semantics which are more lenient.
+  bool Extracted = true;
+  while (Extracted) {
+    Extracted = false;
+    for (OffloadFile &Binary : ArchiveFilesToExtract) {
+      // If the binary was previously extracted it will be set to null.
+      if (!Binary.getBinary())
+        continue;
+
+      SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
+      for (const auto &[ID, Input] : InputFiles)
+        if (object::areTargetsCompatible(Binary, ID))
+          CompatibleTargets.emplace_back(ID);
+
+      for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
+        // Only extract an if we have an an object matching this target.
+        if (!InputFiles.count(ID))
           continue;
 
-        SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
-        for (const auto &[ID, Input] : InputFiles)
-          if (object::areTargetsCompatible(Binary, ID))
-            CompatibleTargets.emplace_back(ID);
-
-        for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
-          // Only extract an if we have an an object matching this target.
-          if (IsArchive && !WholeArchive && !InputFiles.count(ID))
-            continue;
-
-          Expected<bool> ExtractOrErr = getSymbols(
-              Binary.getBinary()->getImage(),
-              Binary.getBinary()->getOffloadKind(), IsArchive, Saver, Syms[ID]);
-          if (!ExtractOrErr)
-            return ExtractOrErr.takeError();
-
-          Extracted = !WholeArchive && *ExtractOrErr;
-
-          // Skip including the file if it is an archive that does not resolve
-          // any symbols.
-          if (IsArchive && !WholeArchive && !Extracted)
-            continue;
-
-          // If another target needs this binary it must be copied instead.
-          if (Index == CompatibleTargets.size() - 1)
-            InputFiles[ID].emplace_back(std::move(Binary));
-          else
-            InputFiles[ID].emplace_back(Binary.copy());
-        }
+        Expected<bool> ExtractOrErr =
+            getSymbols(Binary.getBinary()->getImage(),
+                       Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true,
+                       Saver, Syms[ID]);
+        if (!ExtractOrErr)
+          return ExtractOrErr.takeError();
 
-        // If we extracted any files we need to check all the symbols again.
-        if (Extracted)
-          break;
+        Extracted = *ExtractOrErr;
+
+        // Skip including the file if it is an archive that does not resolve
+        // any symbols.
+        if (!Extracted)
+          continue;
+
+        // If another target needs this binary it must be copied instead.
+        if (Index == CompatibleTargets.size() - 1)
+          InputFiles[ID].emplace_back(std::move(Binary));
+        else
+          InputFiles[ID].emplace_back(Binary.copy());
       }
+
+      // If we extracted any files we need to check all the symbols again.
+      if (Extracted)
+        break;
     }
   }
 

From 6b35cbee3f577d9ee55f7277affa0fe194859b25 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav@gmail.com>
Date: Wed, 10 Apr 2024 18:09:39 +0400
Subject: [PATCH 12/12] [clang] Introduce `SemaSYCL` (#88086)

This patch moves SYCL-related `Sema` functions into new `SemaSYCL`
class, following the recent example of OpenACC and HLSL. This is a part
of the effort to split `Sema`. Additional context can be found in
#82217, #84184, #87634.
---
 clang/include/clang/Sema/Sema.h     | 55 ++++--------------------
 clang/include/clang/Sema/SemaSYCL.h | 65 +++++++++++++++++++++++++++++
 clang/lib/Parse/ParseExpr.cpp       |  5 ++-
 clang/lib/Sema/Sema.cpp             |  6 ++-
 clang/lib/Sema/SemaExpr.cpp         | 22 ----------
 clang/lib/Sema/SemaSYCL.cpp         | 52 +++++++++++++++++------
 clang/lib/Sema/TreeTransform.h      |  4 +-
 7 files changed, 121 insertions(+), 88 deletions(-)
 create mode 100644 clang/include/clang/Sema/SemaSYCL.h

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 9769d36900664..e3e255a0dd76f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -184,6 +184,7 @@ class PseudoObjectExpr;
 class QualType;
 class SemaHLSL;
 class SemaOpenACC;
+class SemaSYCL;
 class StandardConversionSequence;
 class Stmt;
 class StringLiteral;
@@ -467,7 +468,6 @@ class Sema final : public SemaBase {
   // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
   // 38. CUDA (SemaCUDA.cpp)
   // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp)
-  // 40. SYCL Constructs (SemaSYCL.cpp)
 
   /// \name Semantic Analysis
   /// Implementations are in Sema.cpp
@@ -974,6 +974,11 @@ class Sema final : public SemaBase {
     return *OpenACCPtr;
   }
 
+  SemaSYCL &SYCL() {
+    assert(SYCLPtr);
+    return *SYCLPtr;
+  }
+
 protected:
   friend class Parser;
   friend class InitializationSequence;
@@ -1006,6 +1011,7 @@ class Sema final : public SemaBase {
 
   std::unique_ptr<SemaHLSL> HLSLPtr;
   std::unique_ptr<SemaOpenACC> OpenACCPtr;
+  std::unique_ptr<SemaSYCL> SYCLPtr;
 
   ///@}
 
@@ -5455,15 +5461,6 @@ class Sema final : public SemaBase {
   ExprResult ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind);
   ExprResult ActOnIntegerConstant(SourceLocation Loc, uint64_t Val);
 
-  ExprResult BuildSYCLUniqueStableNameExpr(SourceLocation OpLoc,
-                                           SourceLocation LParen,
-                                           SourceLocation RParen,
-                                           TypeSourceInfo *TSI);
-  ExprResult ActOnSYCLUniqueStableNameExpr(SourceLocation OpLoc,
-                                           SourceLocation LParen,
-                                           SourceLocation RParen,
-                                           ParsedType ParsedTy);
-
   bool CheckLoopHintExpr(Expr *E, SourceLocation Loc);
 
   ExprResult ActOnNumericConstant(const Token &Tok, Scope *UDLScope = nullptr);
@@ -14516,44 +14513,6 @@ class Sema final : public SemaBase {
                         OpenMPDirectiveKind CancelRegion);
 
   ///@}
-
-  //
-  //
-  // -------------------------------------------------------------------------
-  //
-  //
-
-  /// \name SYCL Constructs
-  /// Implementations are in SemaSYCL.cpp
-  ///@{
-
-public:
-  /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
-  /// context is "used as device code".
-  ///
-  /// - If CurLexicalContext is a kernel function or it is known that the
-  ///   function will be emitted for the device, emits the diagnostics
-  ///   immediately.
-  /// - If CurLexicalContext is a function and we are compiling
-  ///   for the device, but we don't know that this function will be codegen'ed
-  ///   for devive yet, creates a diagnostic which is emitted if and when we
-  ///   realize that the function will be codegen'ed.
-  ///
-  /// Example usage:
-  ///
-  /// Diagnose __float128 type usage only from SYCL device code if the current
-  /// target doesn't support it
-  /// if (!S.Context.getTargetInfo().hasFloat128Type() &&
-  ///     S.getLangOpts().SYCLIsDevice)
-  ///   SYCLDiagIfDeviceCode(Loc, diag::err_type_unsupported) << "__float128";
-  SemaDiagnosticBuilder SYCLDiagIfDeviceCode(SourceLocation Loc,
-                                             unsigned DiagID);
-
-  void deepTypeCheckForSYCLDevice(SourceLocation UsedAt,
-                                  llvm::DenseSet<QualType> Visited,
-                                  ValueDecl *DeclToCheck);
-
-  ///@}
 };
 
 DeductionFailureInfo
diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h
new file mode 100644
index 0000000000000..f0dcb92ee9ab3
--- /dev/null
+++ b/clang/include/clang/Sema/SemaSYCL.h
@@ -0,0 +1,65 @@
+//===----- SemaSYCL.h ------- Semantic Analysis for SYCL constructs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares semantic analysis for SYCL constructs.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SEMA_SEMASYCL_H
+#define LLVM_CLANG_SEMA_SEMASYCL_H
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Sema/Ownership.h"
+#include "clang/Sema/SemaBase.h"
+#include "llvm/ADT/DenseSet.h"
+
+namespace clang {
+
+class SemaSYCL : public SemaBase {
+public:
+  SemaSYCL(Sema &S);
+
+  /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
+  /// context is "used as device code".
+  ///
+  /// - If CurLexicalContext is a kernel function or it is known that the
+  ///   function will be emitted for the device, emits the diagnostics
+  ///   immediately.
+  /// - If CurLexicalContext is a function and we are compiling
+  ///   for the device, but we don't know yet that this function will be
+  ///   codegen'ed for the devive, creates a diagnostic which is emitted if and
+  ///   when we realize that the function will be codegen'ed.
+  ///
+  /// Example usage:
+  ///
+  /// Diagnose __float128 type usage only from SYCL device code if the current
+  /// target doesn't support it
+  /// if (!S.Context.getTargetInfo().hasFloat128Type() &&
+  ///     S.getLangOpts().SYCLIsDevice)
+  ///   DiagIfDeviceCode(Loc, diag::err_type_unsupported) << "__float128";
+  SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID);
+
+  void deepTypeCheckForDevice(SourceLocation UsedAt,
+                              llvm::DenseSet<QualType> Visited,
+                              ValueDecl *DeclToCheck);
+
+  ExprResult BuildUniqueStableNameExpr(SourceLocation OpLoc,
+                                       SourceLocation LParen,
+                                       SourceLocation RParen,
+                                       TypeSourceInfo *TSI);
+  ExprResult ActOnUniqueStableNameExpr(SourceLocation OpLoc,
+                                       SourceLocation LParen,
+                                       SourceLocation RParen,
+                                       ParsedType ParsedTy);
+};
+
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SEMASYCL_H
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index ae23cb432c439..d08e675604d19 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -30,6 +30,7 @@
 #include "clang/Sema/EnterExpressionEvaluationContext.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/TypoCorrection.h"
 #include "llvm/ADT/SmallVector.h"
 #include <optional>
@@ -2490,8 +2491,8 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() {
   if (T.consumeClose())
     return ExprError();
 
-  return Actions.ActOnSYCLUniqueStableNameExpr(OpLoc, T.getOpenLocation(),
-                                               T.getCloseLocation(), Ty.get());
+  return Actions.SYCL().ActOnUniqueStableNameExpr(
+      OpLoc, T.getOpenLocation(), T.getCloseLocation(), Ty.get());
 }
 
 /// Parse a sizeof or alignof expression.
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 04eadb5f3b8ae..801b03a63dbc8 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -45,6 +45,7 @@
 #include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaOpenACC.h"
+#include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/TemplateDeduction.h"
 #include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Sema/TypoCorrection.h"
@@ -201,6 +202,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       CurScope(nullptr), Ident_super(nullptr),
       HLSLPtr(std::make_unique<SemaHLSL>(*this)),
       OpenACCPtr(std::make_unique<SemaOpenACC>(*this)),
+      SYCLPtr(std::make_unique<SemaSYCL>(*this)),
       MSPointerToMemberRepresentationMethod(
           LangOpts.getMSPointerToMemberRepresentationMethod()),
       MSStructPragmaOn(false), VtorDispStack(LangOpts.getVtorDispMode()),
@@ -1903,7 +1905,7 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) {
                                       : CUDADiagIfHostCode(Loc, DiagID);
 
   if (getLangOpts().SYCLIsDevice)
-    return SYCLDiagIfDeviceCode(Loc, DiagID);
+    return SYCL().DiagIfDeviceCode(Loc, DiagID);
 
   return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, DiagID,
                                FD, *this);
@@ -1919,7 +1921,7 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
   // constant byte size like zero length arrays. So, do a deep check for SYCL.
   if (D && LangOpts.SYCLIsDevice) {
     llvm::DenseSet<QualType> Visited;
-    deepTypeCheckForSYCLDevice(Loc, Visited, D);
+    SYCL().deepTypeCheckForDevice(Loc, Visited, D);
   }
 
   Decl *C = cast<Decl>(getCurLexicalContext());
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 594c11788f4e7..4d4ef9b16381b 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3794,28 +3794,6 @@ ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc,
                                 SL);
 }
 
-ExprResult Sema::BuildSYCLUniqueStableNameExpr(SourceLocation OpLoc,
-                                               SourceLocation LParen,
-                                               SourceLocation RParen,
-                                               TypeSourceInfo *TSI) {
-  return SYCLUniqueStableNameExpr::Create(Context, OpLoc, LParen, RParen, TSI);
-}
-
-ExprResult Sema::ActOnSYCLUniqueStableNameExpr(SourceLocation OpLoc,
-                                               SourceLocation LParen,
-                                               SourceLocation RParen,
-                                               ParsedType ParsedTy) {
-  TypeSourceInfo *TSI = nullptr;
-  QualType Ty = GetTypeFromParser(ParsedTy, &TSI);
-
-  if (Ty.isNull())
-    return ExprError();
-  if (!TSI)
-    TSI = Context.getTrivialTypeSourceInfo(Ty, LParen);
-
-  return BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI);
-}
-
 ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) {
   return BuildPredefinedExpr(Loc, getPredefinedExprKind(Kind));
 }
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 18ebaa13346a4..18f6d8f030473 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -8,6 +8,7 @@
 // This implements Semantic Analysis for SYCL constructs.
 //===----------------------------------------------------------------------===//
 
+#include "clang/Sema/SemaSYCL.h"
 #include "clang/AST/Mangle.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -18,28 +19,30 @@ using namespace clang;
 // SYCL device specific diagnostics implementation
 // -----------------------------------------------------------------------------
 
-Sema::SemaDiagnosticBuilder Sema::SYCLDiagIfDeviceCode(SourceLocation Loc,
+SemaSYCL::SemaSYCL(Sema &S) : SemaBase(S) {}
+
+Sema::SemaDiagnosticBuilder SemaSYCL::DiagIfDeviceCode(SourceLocation Loc,
                                                        unsigned DiagID) {
   assert(getLangOpts().SYCLIsDevice &&
          "Should only be called during SYCL compilation");
-  FunctionDecl *FD = dyn_cast<FunctionDecl>(getCurLexicalContext());
+  FunctionDecl *FD = dyn_cast<FunctionDecl>(SemaRef.getCurLexicalContext());
   SemaDiagnosticBuilder::Kind DiagKind = [this, FD] {
     if (!FD)
       return SemaDiagnosticBuilder::K_Nop;
-    if (getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted)
+    if (SemaRef.getEmissionStatus(FD) == Sema::FunctionEmissionStatus::Emitted)
       return SemaDiagnosticBuilder::K_ImmediateWithCallStack;
     return SemaDiagnosticBuilder::K_Deferred;
   }();
-  return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, *this);
+  return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, FD, SemaRef);
 }
 
-static bool isZeroSizedArray(Sema &SemaRef, QualType Ty) {
-  if (const auto *CAT = SemaRef.getASTContext().getAsConstantArrayType(Ty))
+static bool isZeroSizedArray(SemaSYCL &S, QualType Ty) {
+  if (const auto *CAT = S.getASTContext().getAsConstantArrayType(Ty))
     return CAT->isZeroSize();
   return false;
 }
 
-void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt,
+void SemaSYCL::deepTypeCheckForDevice(SourceLocation UsedAt,
                                       llvm::DenseSet<QualType> Visited,
                                       ValueDecl *DeclToCheck) {
   assert(getLangOpts().SYCLIsDevice &&
@@ -51,18 +54,18 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt,
   auto Check = [&](QualType TypeToCheck, const ValueDecl *D) {
     bool ErrorFound = false;
     if (isZeroSizedArray(*this, TypeToCheck)) {
-      SYCLDiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1;
+      DiagIfDeviceCode(UsedAt, diag::err_typecheck_zero_array_size) << 1;
       ErrorFound = true;
     }
     // Checks for other types can also be done here.
     if (ErrorFound) {
       if (NeedToEmitNotes) {
         if (auto *FD = dyn_cast<FieldDecl>(D))
-          SYCLDiagIfDeviceCode(FD->getLocation(),
-                               diag::note_illegal_field_declared_here)
+          DiagIfDeviceCode(FD->getLocation(),
+                           diag::note_illegal_field_declared_here)
               << FD->getType()->isPointerType() << FD->getType();
         else
-          SYCLDiagIfDeviceCode(D->getLocation(), diag::note_declared_at);
+          DiagIfDeviceCode(D->getLocation(), diag::note_declared_at);
       }
     }
 
@@ -93,8 +96,8 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt,
     auto EmitHistory = [&]() {
       // The first element is always nullptr.
       for (uint64_t Index = 1; Index < History.size(); ++Index) {
-        SYCLDiagIfDeviceCode(History[Index]->getLocation(),
-                             diag::note_within_field_of_type)
+        DiagIfDeviceCode(History[Index]->getLocation(),
+                         diag::note_within_field_of_type)
             << History[Index]->getType();
       }
     };
@@ -130,3 +133,26 @@ void Sema::deepTypeCheckForSYCLDevice(SourceLocation UsedAt,
     }
   } while (!StackForRecursion.empty());
 }
+
+ExprResult SemaSYCL::BuildUniqueStableNameExpr(SourceLocation OpLoc,
+                                               SourceLocation LParen,
+                                               SourceLocation RParen,
+                                               TypeSourceInfo *TSI) {
+  return SYCLUniqueStableNameExpr::Create(getASTContext(), OpLoc, LParen,
+                                          RParen, TSI);
+}
+
+ExprResult SemaSYCL::ActOnUniqueStableNameExpr(SourceLocation OpLoc,
+                                               SourceLocation LParen,
+                                               SourceLocation RParen,
+                                               ParsedType ParsedTy) {
+  TypeSourceInfo *TSI = nullptr;
+  QualType Ty = SemaRef.GetTypeFromParser(ParsedTy, &TSI);
+
+  if (Ty.isNull())
+    return ExprError();
+  if (!TSI)
+    TSI = getASTContext().getTrivialTypeSourceInfo(Ty, LParen);
+
+  return BuildUniqueStableNameExpr(OpLoc, LParen, RParen, TSI);
+}
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index d4d2fa61d65ea..79d60588ae536 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -40,6 +40,7 @@
 #include "clang/Sema/SemaDiagnostic.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaOpenACC.h"
+#include "clang/Sema/SemaSYCL.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
@@ -2632,7 +2633,8 @@ class TreeTransform {
                                              SourceLocation LParen,
                                              SourceLocation RParen,
                                              TypeSourceInfo *TSI) {
-    return getSema().BuildSYCLUniqueStableNameExpr(OpLoc, LParen, RParen, TSI);
+    return getSema().SYCL().BuildUniqueStableNameExpr(OpLoc, LParen, RParen,
+                                                      TSI);
   }
 
   /// Build a new predefined expression.