diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 15b964e11f711e..15001b842f6e84 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -343,6 +343,7 @@ Bug Fixes to C++ Support - Fixed an assertion failure when converting vectors to int/float with invalid expressions. (#GH105486) - Template parameter names are considered in the name lookup of out-of-line class template specialization right before its declaration context. (#GH64082) +- Fixed a constraint comparison bug for friend declarations. (#GH78101) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index bce7605b95ba43..8b976bdac6dc51 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -39,7 +39,7 @@ namespace clang { DIAG_SIZE_AST = 300, DIAG_SIZE_COMMENT = 100, DIAG_SIZE_CROSSTU = 100, - DIAG_SIZE_SEMA = 4500, + DIAG_SIZE_SEMA = 5000, DIAG_SIZE_ANALYSIS = 100, DIAG_SIZE_REFACTORING = 1000, DIAG_SIZE_INSTALLAPI = 100, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ea898ba88d8887..68eb34b12c1a7d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5606,7 +5606,7 @@ def note_checking_constraints_for_function_here : Note< def note_constraint_substitution_here : Note< "while substituting template arguments into constraint expression here">; def note_constraint_normalization_here : Note< - "while calculating associated constraint of template '%0' here">; + "while calculating associated constraint of template %0 here">; def note_parameter_mapping_substitution_here : Note< "while substituting into concept arguments here; substitution failures not " "allowed in concept arguments">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b5dfd4dd32b63c..3dc13c14c00343 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12720,8 +12720,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case Builtin::BI__noop: - // __noop always evaluates successfully - return true; + // __noop always evaluates successfully and returns 0. + return Success(0, E); case Builtin::BI__builtin_is_constant_evaluated: { const auto *Callee = Info.CurrentCall->getCallee(); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 6b2ba22504129d..3d054e64d1ff6a 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -851,8 +851,8 @@ std::optional ToolChain::getRuntimePath() const { llvm::sys::path::append(P, "lib"); if (auto Ret = getTargetSubDirPath(P)) return Ret; - // Darwin does not use per-target runtime directory. - if (Triple.isOSDarwin()) + // Darwin and AIX does not use per-target runtime directory. + if (Triple.isOSDarwin() || Triple.isOSAIX()) return {}; llvm::sys::path::append(P, Triple.str()); return std::string(P); diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 246b29d308bfaf..0d42a6c2bfb5c6 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -609,9 +609,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = NextTok->isNot(tok::l_square); } - // Cpp macro definition body that is a nonempty braced list or block: + // Cpp macro definition body containing nonempty braced list or block: if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && - !FormatTok->Previous && NextTok->is(tok::eof) && // A statement can end with only `;` (simple statement), a block // closing brace (compound statement), or `:` (label statement). // If PrevTok is a block opening brace, Tok ends an empty block. diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 86d6f308a51cc2..6a1b32598bb4a6 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -1012,7 +1012,14 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction( // possible that e.g. constraints involving C> and C are // perceived identical. std::optional ContextScope; - if (auto *RD = dyn_cast(DeclInfo.getDeclContext())) { + const DeclContext *DC = [&] { + if (!DeclInfo.getDecl()) + return DeclInfo.getDeclContext(); + return DeclInfo.getDecl()->getFriendObjectKind() + ? DeclInfo.getLexicalDeclContext() + : DeclInfo.getDeclContext(); + }(); + if (auto *RD = dyn_cast(DC)) { ThisScope.emplace(S, const_cast(RD), Qualifiers()); ContextScope.emplace(S, const_cast(cast(RD)), /*NewThisContext=*/false); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 776297479e141e..c42cc250bb904a 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1226,8 +1226,7 @@ void Sema::PrintInstantiationStack() { case CodeSynthesisContext::ConstraintNormalization: Diags.Report(Active->PointOfInstantiation, diag::note_constraint_normalization_here) - << cast(Active->Entity)->getName() - << Active->InstantiationRange; + << cast(Active->Entity) << Active->InstantiationRange; break; case CodeSynthesisContext::ParameterMappingSubstitution: Diags.Report(Active->PointOfInstantiation, diff --git a/clang/test/CXX/temp/temp.constr/temp.constr.normal/p1.cpp b/clang/test/CXX/temp/temp.constr/temp.constr.normal/p1.cpp index d80710937cdfa1..3992835c444027 100644 --- a/clang/test/CXX/temp/temp.constr/temp.constr.normal/p1.cpp +++ b/clang/test/CXX/temp/temp.constr/temp.constr.normal/p1.cpp @@ -15,7 +15,7 @@ template requires Bar2 struct S2 { }; // expected-note@-1{{template is declared here}} template requires Bar2 && true struct S2 { }; // expected-error@-1{{class template partial specialization is not more specialized than the primary template}} -// expected-note@-2{{while calculating associated constraint of template 'S2' here}} +// expected-note@-2{{while calculating associated constraint of template 'S2' here}} namespace type_pack { template diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 3ed8b6f0c71861..ab36b6e7720ba3 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -639,7 +639,7 @@ void test6(struct anon_struct *p, int index) { p->array[index] = __builtin_dynamic_object_size(p->array, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test6_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -3) i64 @test6_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -649,7 +649,7 @@ void test6(struct anon_struct *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], i64 0, i64 [[TMP0]] // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP1]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test6_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -3) i64 @test6_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -955,7 +955,7 @@ void test10(struct union_of_fams *p, int index) { p->bytes[index] = (unsigned char)__builtin_dynamic_object_size(p->bytes, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -964,7 +964,7 @@ void test10(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NARROW]] to i64 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP0]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 diff --git a/clang/test/Driver/aix-print-runtime-dir.c b/clang/test/Driver/aix-print-runtime-dir.c new file mode 100644 index 00000000000000..0d68ad6fee005e --- /dev/null +++ b/clang/test/Driver/aix-print-runtime-dir.c @@ -0,0 +1,11 @@ +// Test output of -print-runtime-dir on AIX + +// RUN: %clang -print-runtime-dir --target=powerpc-ibm-aix \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: | FileCheck --check-prefix=PRINT-RUNTIME-DIR %s + +// RUN: %clang -print-runtime-dir --target=powerpc64-ibm-aix \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: | FileCheck --check-prefix=PRINT-RUNTIME-DIR %s + +// PRINT-RUNTIME-DIR: lib{{/|\\}}aix{{$}} diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp index c6fbb8b514d671..f47ed3a1f7ebfc 100644 --- a/clang/test/SemaCXX/builtins.cpp +++ b/clang/test/SemaCXX/builtins.cpp @@ -177,5 +177,21 @@ static void __builtin_cpu_init(); // expected-error {{static declaration of '__b #endif #ifdef _MSC_VER -constexpr int x = []{ __noop; return 0; }(); // expected-no-diagnostics +constexpr int x = [] { + __noop; + return 0; +}(); // expected-no-diagnostics +static_assert([] { return __noop; }() == 0); +static_assert([] { return __noop(4); }() == 0); +extern int not_accessed; +void not_called(); +static_assert([] { return __noop(not_accessed *= 6); }() == 0); +static_assert([] { return __noop(not_called()); }() == 0); +static_assert([] { return __noop(throw ""); }() == 0); +static_assert([] { return __noop(throw "", throw ""); }() == 0); +static_assert([] { + int a = 5; + __noop(++a); + return a; +}() == 5); #endif diff --git a/clang/test/SemaTemplate/concepts-friends.cpp b/clang/test/SemaTemplate/concepts-friends.cpp index 14b37d78d951dc..d05be423a8cfcd 100644 --- a/clang/test/SemaTemplate/concepts-friends.cpp +++ b/clang/test/SemaTemplate/concepts-friends.cpp @@ -525,3 +525,26 @@ struct S { }; } + +namespace GH78101 { + +template +concept True = true; + +template struct Template { + static constexpr int i = I; + + friend constexpr auto operator+(True auto f) { return i; } +}; + +template struct Template { + static constexpr int i = I; + + friend constexpr auto operator+(True auto f) { return i; } +}; + +Template f{}; + +static_assert(+Template{} == 5); + +} // namespace GH78101 diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 497b911f4efbba..5d37a65250d0b1 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3256,6 +3256,11 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_BRACE_KIND(Tokens[10], BK_Block); EXPECT_TOKEN(Tokens[11], tok::r_brace, TT_StructRBrace); EXPECT_BRACE_KIND(Tokens[11], BK_Block); + + Tokens = annotate("#define MEMBER(NAME) NAME{\"\"}"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[9], BK_BracedInit); } TEST_F(TokenAnnotatorTest, UnderstandsElaboratedTypeSpecifier) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp index f1cc0b5e1e8ac0..88536fc4e6222f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp @@ -30,7 +30,7 @@ namespace __sanitizer { bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { Dl_info info; int result = dladdr((const void *)addr, &info); - if (!result) return false; + if (!result || !info.dli_sname) return false; // Compute offset if possible. `dladdr()` doesn't always ensure that `addr >= // sym_addr` so only compute the offset when this holds. Failure to find the @@ -51,7 +51,7 @@ bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *datainfo) { Dl_info info; int result = dladdr((const void *)addr, &info); - if (!result) return false; + if (!result || !info.dli_sname) return false; const char *demangled = DemangleSwiftAndCXX(info.dli_sname); if (!demangled) demangled = info.dli_sname; diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h index 845ba385918d0d..a103861f1510b8 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h @@ -341,7 +341,18 @@ constexpr TypeBuilderFunc getModel() { template <> constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { - return mlir::FloatType::getF80(context); + // See TODO at the top of the file. This is configuring for the host system + // - it might be incorrect when cross-compiling! + constexpr size_t size = sizeof(long double); + static_assert(size == 16 || size == 10 || size == 8, + "unsupported long double size"); + if constexpr (size == 16) + return mlir::FloatType::getF128(context); + if constexpr (size == 10) + return mlir::FloatType::getF80(context); + if constexpr (size == 8) + return mlir::FloatType::getF64(context); + llvm_unreachable("failed static assert"); }; } template <> diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 54f2a12d800085..029d3776bcc0b8 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -146,8 +146,8 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertBoxedSequenceType( elements.push_back(subrangeTy); } return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, - /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + context, llvm::dwarf::DW_TAG_array_type, /*name=*/nullptr, + /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, dataLocation, /*rank=*/nullptr, allocated, associated); } @@ -188,7 +188,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( } return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_structure_type, /*recursive_id=*/{}, + context, llvm::dwarf::DW_TAG_structure_type, mlir::StringAttr::get(context, result.second.name), fileAttr, line, scope, /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8, /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr, @@ -236,8 +236,8 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( // have been set to some valid default values. return mlir::LLVM::DICompositeTypeAttr::get( - context, llvm::dwarf::DW_TAG_array_type, /*recursive_id=*/{}, - /*name=*/nullptr, /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, + context, llvm::dwarf::DW_TAG_array_type, /*name=*/nullptr, + /*file=*/nullptr, /*line=*/0, /*scope=*/nullptr, elemTy, mlir::LLVM::DIFlags::Zero, /*sizeInBits=*/0, /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr, /*allocated=*/nullptr, /*associated=*/nullptr); diff --git a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order.pass.cpp b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order.pass.cpp index ac6b6879f77309..e384ea289bb5bf 100644 --- a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order.pass.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order.pass.cpp @@ -454,12 +454,16 @@ int main(int, char**) test_1_2(); test_1_3(); test_1_3(); - // test_1_3(); // UNIMPLEMENTED +#ifdef TEST_LONG_DOUBLE_IS_DOUBLE + test_1_3(); // UNIMPLEMENTED when long double is a distinct type +#endif test_1_4(); static_assert(test_1_3()); static_assert(test_1_3()); - // static_assert(test_1_3()); // UNIMPLEMENTED +#ifdef TEST_LONG_DOUBLE_IS_DOUBLE + static_assert(test_1_3()); // UNIMPLEMENTED when long double is a distinct type +#endif static_assert(test_1_4()); return 0; diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h index 420307e0dbcf02..03d9f927997476 100644 --- a/lldb/include/lldb/Symbol/Type.h +++ b/lldb/include/lldb/Symbol/Type.h @@ -77,10 +77,13 @@ FLAGS_ENUM(TypeQueryOptions){ /// If set, the query will ignore all Module entries in the type context, /// even for exact matches. e_ignore_modules = (1u << 2), + /// If set, all anonymous namespaces in the context must be matched exactly + /// by the pattern. Otherwise, superfluous namespaces are skipped. + e_strict_namespaces = (1u << 3), /// When true, the find types call should stop the query as soon as a single /// matching type is found. When false, the type query should find all /// matching types. - e_find_one = (1u << 3), + e_find_one = (1u << 4), }; LLDB_MARK_AS_BITMASK_ENUM(TypeQueryOptions) @@ -264,7 +267,22 @@ class TypeQuery { bool GetExactMatch() const { return (m_options & e_exact_match) != 0; } bool GetIgnoreModules() const { return (m_options & e_ignore_modules) != 0; } - void SetIgnoreModules() { m_options &= ~e_ignore_modules; } + void SetIgnoreModules(bool b) { + if (b) + m_options |= e_ignore_modules; + else + m_options &= ~e_ignore_modules; + } + + bool GetStrictNamespaces() const { + return (m_options & e_strict_namespaces) != 0; + } + void SetStrictNamespaces(bool b) { + if (b) + m_options |= e_strict_namespaces; + else + m_options &= ~e_strict_namespaces; + } /// The \a m_context can be used in two ways: normal types searching with /// the context containing a stanadard declaration context for a type, or @@ -279,7 +297,7 @@ class TypeQuery { if (b) m_options |= e_find_one; else - m_options &= (e_exact_match | e_find_one); + m_options &= ~e_find_one; } /// Access the internal compiler context array. diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp index 7af768aad0bc19..4c547afe30fe81 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp @@ -90,6 +90,7 @@ TypeAndOrName ItaniumABILanguageRuntime::GetTypeInfo( TypeResults results; TypeQuery query(const_lookup_name.GetStringRef(), TypeQueryOptions::e_exact_match | + TypeQueryOptions::e_strict_namespaces | TypeQueryOptions::e_find_one); if (module_sp) { module_sp->FindTypes(query, results); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp index fb32e2adeb3fea..0a13c457a307ae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp @@ -440,12 +440,6 @@ static void GetTypeLookupContextImpl(DWARFDIE die, continue; } - // If there is no name, then there is no need to look anything up for this - // DIE. - const char *name = die.GetName(); - if (!name || !name[0]) - return; - // Add this DIE's contribution at the end of the chain. auto push_ctx = [&](CompilerContextKind kind, llvm::StringRef name) { context.push_back({kind, ConstString(name)}); @@ -471,7 +465,7 @@ static void GetTypeLookupContextImpl(DWARFDIE die, push_ctx(CompilerContextKind::Typedef, die.GetName()); break; case DW_TAG_base_type: - push_ctx(CompilerContextKind::Builtin, name); + push_ctx(CompilerContextKind::Builtin, die.GetName()); break; // If any of the tags below appear in the parent chain, stop the decl // context and return. Prior to these being in here, if a type existed in a diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index eb321407e3734c..f7b44ade0da165 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -134,6 +134,20 @@ bool TypeQuery::ContextMatches( if (ctx == ctx_end) return false; // Pattern too long. + if (ctx->kind == CompilerContextKind::Namespace && ctx->name.IsEmpty()) { + // We're matching an anonymous namespace. These are optional, so we check + // if the pattern expects an anonymous namespace. + if (pat->name.IsEmpty() && (pat->kind & CompilerContextKind::Namespace) == + CompilerContextKind::Namespace) { + // Match, advance both iterators. + ++pat; + } + // Otherwise, only advance the context to skip over the anonymous + // namespace, and try matching again. + ++ctx; + continue; + } + // See if there is a kind mismatch; they should have 1 bit in common. if ((ctx->kind & pat->kind) == CompilerContextKind()) return false; @@ -145,10 +159,16 @@ bool TypeQuery::ContextMatches( ++pat; } - // Skip over any remaining module entries if we were asked to do that. - while (GetIgnoreModules() && ctx != ctx_end && - ctx->kind == CompilerContextKind::Module) - ++ctx; + // Skip over any remaining module and anonymous namespace entries if we were + // asked to do that. + auto should_skip = [this](const CompilerContext &ctx) { + if (ctx.kind == CompilerContextKind::Module) + return GetIgnoreModules(); + if (ctx.kind == CompilerContextKind::Namespace && ctx.name.IsEmpty()) + return !GetStrictNamespaces(); + return false; + }; + ctx = std::find_if_not(ctx, ctx_end, should_skip); // At this point, we have exhausted the pattern and we have a partial match at // least. If that's all we're looking for, we're done. @@ -788,7 +808,13 @@ Type::GetTypeScopeAndBasename(llvm::StringRef name) { switch (pos.value()) { case ':': if (prev_is_colon && template_depth == 0) { - result.scope.push_back(name.slice(name_begin, pos.index() - 1)); + llvm::StringRef scope_name = name.slice(name_begin, pos.index() - 1); + // The itanium demangler uses this string to represent anonymous + // namespaces. Convert it to a more language-agnostic form (which is + // also used in DWARF). + if (scope_name == "(anonymous namespace)") + scope_name = ""; + result.scope.push_back(scope_name); name_begin = pos.index() + 1; } break; diff --git a/lldb/test/API/lang/cpp/dynamic-value/Makefile b/lldb/test/API/lang/cpp/dynamic-value/Makefile index 2bba8e757f79b7..ce91dc63f473f5 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/Makefile +++ b/lldb/test/API/lang/cpp/dynamic-value/Makefile @@ -1,3 +1,3 @@ -CXX_SOURCES := pass-to-base.cpp +CXX_SOURCES := pass-to-base.cpp anonymous-b.cpp include Makefile.rules diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py index 60a2590e1559d3..e016168f047c19 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py @@ -170,7 +170,7 @@ def test_get_dynamic_vals(self): self.assertTrue(reallyA_value) reallyA_loc = int(reallyA_value.GetLocation(), 16) - # Finally continue to doSomething again, and make sure we get the right value for anotherA, + # Continue to doSomething again, and make sure we get the right value for anotherA, # which this time around is just an "A". threads = lldbutil.continue_to_breakpoint(process, do_something_bpt) @@ -184,6 +184,19 @@ def test_get_dynamic_vals(self): self.assertEqual(anotherA_loc, reallyA_loc) self.assertEqual(anotherA_value.GetTypeName().find("B"), -1) + # Finally do the same with a B in an anonymous namespace. + threads = lldbutil.continue_to_breakpoint(process, do_something_bpt) + self.assertEqual(len(threads), 1) + thread = threads[0] + + frame = thread.GetFrameAtIndex(0) + anotherA_value = frame.FindVariable("anotherA", use_dynamic) + self.assertTrue(anotherA_value) + self.assertIn("B", anotherA_value.GetTypeName()) + anon_b_value = anotherA_value.GetChildMemberWithName("m_anon_b_value") + self.assertTrue(anon_b_value) + self.assertEqual(anon_b_value.GetValueAsSigned(), 47) + def examine_value_object_of_this_ptr( self, this_static, this_dynamic, dynamic_location ): diff --git a/lldb/test/API/lang/cpp/dynamic-value/a.h b/lldb/test/API/lang/cpp/dynamic-value/a.h new file mode 100644 index 00000000000000..708cbb79fee5cd --- /dev/null +++ b/lldb/test/API/lang/cpp/dynamic-value/a.h @@ -0,0 +1,25 @@ +#ifndef A_H +#define A_H + +#include +#include + +class A { +public: + A(int value) : m_a_value(value) {} + A(int value, A *client_A) : m_a_value(value), m_client_A(client_A) {} + + virtual ~A() {} + + virtual void doSomething(A &anotherA); + + int Value() { return m_a_value; } + +private: + int m_a_value; + std::auto_ptr m_client_A; +}; + +A *make_anonymous_B(); + +#endif diff --git a/lldb/test/API/lang/cpp/dynamic-value/anonymous-b.cpp b/lldb/test/API/lang/cpp/dynamic-value/anonymous-b.cpp new file mode 100644 index 00000000000000..755afcbf12a988 --- /dev/null +++ b/lldb/test/API/lang/cpp/dynamic-value/anonymous-b.cpp @@ -0,0 +1,13 @@ +#include "a.h" + +namespace { +class B : public A { +public: + B() : A(42) {} + +private: + int m_anon_b_value = 47; +}; +} // namespace + +A *make_anonymous_B() { return new B(); } diff --git a/lldb/test/API/lang/cpp/dynamic-value/pass-to-base.cpp b/lldb/test/API/lang/cpp/dynamic-value/pass-to-base.cpp index 2bccf3303823c1..be763390cc6f90 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/pass-to-base.cpp +++ b/lldb/test/API/lang/cpp/dynamic-value/pass-to-base.cpp @@ -1,5 +1,10 @@ -#include -#include +#include "a.h" + +void A::doSomething(A &anotherA) { + printf("In A %p doing something with %d.\n", this, m_a_value); + int tmp_value = anotherA.Value(); + printf("Also have another A at %p: %d.\n", &anotherA, tmp_value); // Break here in doSomething. +} class Extra { @@ -11,33 +16,6 @@ class Extra int m_extra_two; }; -class A -{ -public: - A(int value) : m_a_value (value) {} - A(int value, A* client_A) : m_a_value (value), m_client_A (client_A) {} - - virtual ~A() {} - - virtual void - doSomething (A &anotherA) - { - printf ("In A %p doing something with %d.\n", this, m_a_value); - int tmp_value = anotherA.Value(); - printf ("Also have another A at %p: %d.\n", &anotherA, tmp_value); // Break here in doSomething. - } - - int - Value() - { - return m_a_value; - } - -private: - int m_a_value; - std::auto_ptr m_client_A; -}; - class B : public Extra, public virtual A { public: @@ -65,5 +43,7 @@ main (int argc, char **argv) A reallyA (500); myB.doSomething (reallyA); // Break here and get real address of reallyA. + myB.doSomething(*make_anonymous_B()); + return 0; } diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespace.py b/lldb/test/API/lang/cpp/namespace/TestNamespace.py index 84891b322180c3..8b013d928f9ca5 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespace.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespace.py @@ -208,6 +208,12 @@ def test_with_run_command(self): patterns=[" = 3"], ) + # Search for a type in an anonymous namespace, both with and without the + # namespace prefix. + self.expect("type lookup -- my_uint_t", substrs=["unsigned int"]) + self.expect("type lookup -- (anonymous namespace)::my_uint_t", + substrs=["unsigned int"]) + # rdar://problem/8660275 # test/namespace: 'expression -- i+j' not working # This has been fixed. diff --git a/lldb/unittests/Symbol/TestType.cpp b/lldb/unittests/Symbol/TestType.cpp index e4b56b9ff02f7c..e3bb2cf6e69e2a 100644 --- a/lldb/unittests/Symbol/TestType.cpp +++ b/lldb/unittests/Symbol/TestType.cpp @@ -16,6 +16,7 @@ using namespace lldb; using namespace lldb_private; +using testing::ElementsAre; using testing::Not; TEST(Type, GetTypeScopeAndBasename) { @@ -59,8 +60,33 @@ MATCHER_P(MatchesIgnoringModules, pattern, "") { TypeQuery query(pattern, TypeQueryOptions::e_ignore_modules); return query.ContextMatches(arg); } +MATCHER_P(MatchesWithStrictNamespaces, pattern, "") { + TypeQuery query(pattern, TypeQueryOptions::e_strict_namespaces); + return query.ContextMatches(arg); +} } // namespace +TEST(Type, TypeQueryFlags) { + TypeQuery q("foo", e_none); + auto get = [](const TypeQuery &q) -> std::vector { + return {q.GetFindOne(), q.GetExactMatch(), q.GetModuleSearch(), + q.GetIgnoreModules(), q.GetStrictNamespaces()}; + }; + EXPECT_THAT(get(q), ElementsAre(false, false, false, false, false)); + + q.SetFindOne(true); + EXPECT_THAT(get(q), ElementsAre(true, false, false, false, false)); + + q.SetIgnoreModules(true); + EXPECT_THAT(get(q), ElementsAre(true, false, false, true, false)); + + q.SetStrictNamespaces(true); + EXPECT_THAT(get(q), ElementsAre(true, false, false, true, true)); + + q.SetIgnoreModules(false); + EXPECT_THAT(get(q), ElementsAre(true, false, false, false, true)); +} + TEST(Type, CompilerContextPattern) { auto make_module = [](llvm::StringRef name) { return CompilerContext(CompilerContextKind::Module, ConstString(name)); @@ -103,6 +129,10 @@ TEST(Type, CompilerContextPattern) { (std::vector{make_module("A"), make_module("B"), make_class("C")}), Matches( std::vector{make_module("A"), make_module("B"), make_any_type("C")})); + EXPECT_THAT((std::vector{make_module("A"), make_module("B"), + make_namespace(""), make_class("C")}), + Matches(std::vector{make_module("A"), make_module("B"), + make_any_type("C")})); EXPECT_THAT( (std::vector{make_module("A"), make_module("B"), make_enum("C2")}), Not(Matches(std::vector{make_module("A"), make_module("B"), @@ -111,4 +141,30 @@ TEST(Type, CompilerContextPattern) { Matches(std::vector{make_class("C")})); EXPECT_THAT((std::vector{make_namespace("NS"), make_class("C")}), Not(Matches(std::vector{make_any_type("C")}))); + + EXPECT_THAT((std::vector{make_namespace(""), make_class("C")}), + Matches(std::vector{make_class("C")})); + EXPECT_THAT((std::vector{make_namespace(""), make_class("C")}), + Not(MatchesWithStrictNamespaces(std::vector{make_class("C")}))); + EXPECT_THAT((std::vector{make_namespace(""), make_class("C")}), + Matches(std::vector{make_namespace(""), make_class("C")})); + EXPECT_THAT((std::vector{make_namespace(""), make_class("C")}), + MatchesWithStrictNamespaces( + std::vector{make_namespace(""), make_class("C")})); + EXPECT_THAT((std::vector{make_class("C")}), + Not(Matches(std::vector{make_namespace(""), make_class("C")}))); + EXPECT_THAT((std::vector{make_class("C")}), + Not(MatchesWithStrictNamespaces( + std::vector{make_namespace(""), make_class("C")}))); + EXPECT_THAT((std::vector{make_namespace(""), make_namespace("NS"), + make_namespace(""), make_class("C")}), + Matches(std::vector{make_namespace("NS"), make_class("C")})); + EXPECT_THAT( + (std::vector{make_namespace(""), make_namespace(""), make_namespace("NS"), + make_namespace(""), make_namespace(""), make_class("C")}), + Matches(std::vector{make_namespace("NS"), make_class("C")})); + EXPECT_THAT((std::vector{make_module("A"), make_namespace("NS"), + make_namespace(""), make_class("C")}), + MatchesIgnoringModules( + std::vector{make_namespace("NS"), make_class("C")})); } diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp index 122b7de7516b6d..1e4c8f3ba07787 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp @@ -222,6 +222,9 @@ TEST(DWARFDIETest, GetContext) { Attributes: - Attribute: DW_AT_name Form: DW_FORM_string + - Code: 0x4 + Tag: DW_TAG_namespace + Children: DW_CHILDREN_yes debug_info: - Version: 4 AddrSize: 8 @@ -235,6 +238,11 @@ TEST(DWARFDIETest, GetContext) { - AbbrCode: 0x3 Values: - CStr: STRUCT + - AbbrCode: 0x4 + - AbbrCode: 0x3 + Values: + - CStr: STRUCT + - AbbrCode: 0x0 - AbbrCode: 0x0 - AbbrCode: 0x0 )"; @@ -245,15 +253,17 @@ TEST(DWARFDIETest, GetContext) { DWARFUnit *unit = symbol_file->DebugInfo().GetUnitAtIndex(0); ASSERT_TRUE(unit); - auto make_namespace = [](llvm::StringRef name) { + auto make_namespace = [](const char *name) { return CompilerContext(CompilerContextKind::Namespace, ConstString(name)); }; - auto make_struct = [](llvm::StringRef name) { + auto make_struct = [](const char *name) { return CompilerContext(CompilerContextKind::ClassOrStruct, ConstString(name)); }; DWARFDIE struct_die = unit->DIE().GetFirstChild().GetFirstChild(); ASSERT_TRUE(struct_die); + DWARFDIE anon_struct_die = struct_die.GetSibling().GetFirstChild(); + ASSERT_TRUE(anon_struct_die); EXPECT_THAT( struct_die.GetDeclContext(), testing::ElementsAre(make_namespace("NAMESPACE"), make_struct("STRUCT"))); @@ -263,6 +273,18 @@ TEST(DWARFDIETest, GetContext) { EXPECT_THAT(struct_die.GetDWARFDeclContext(), DWARFDeclContext({{DW_TAG_structure_type, "STRUCT"}, {DW_TAG_namespace, "NAMESPACE"}})); + EXPECT_THAT(anon_struct_die.GetDeclContext(), + testing::ElementsAre(make_namespace("NAMESPACE"), + make_namespace(nullptr), + make_struct("STRUCT"))); + EXPECT_THAT(anon_struct_die.GetTypeLookupContext(), + testing::ElementsAre(make_namespace("NAMESPACE"), + make_namespace(nullptr), + make_struct("STRUCT"))); + EXPECT_THAT(anon_struct_die.GetDWARFDeclContext(), + DWARFDeclContext({{DW_TAG_structure_type, "STRUCT"}, + {DW_TAG_namespace, nullptr}, + {DW_TAG_namespace, "NAMESPACE"}})); } TEST(DWARFDIETest, GetContextInFunction) { diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index f76eacb9d51366..3707ca824f6e9c 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -356,6 +356,8 @@ if (NOT PURE_WINDOWS) endif() check_symbol_exists(pthread_getname_np pthread.h HAVE_PTHREAD_GETNAME_NP) check_symbol_exists(pthread_setname_np pthread.h HAVE_PTHREAD_SETNAME_NP) + check_symbol_exists(pthread_get_name_np "pthread.h;pthread_np.h" HAVE_PTHREAD_GET_NAME_NP) + check_symbol_exists(pthread_set_name_np "pthread.h;pthread_np.h" HAVE_PTHREAD_SET_NAME_NP) if (LLVM_PTHREAD_LIB) list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${LLVM_PTHREAD_LIB}) endif() diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 65ba3f15305c78..a42dae8887392d 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -106,11 +106,26 @@ class [[nodiscard]] APInt { /// \param numBits the bit width of the constructed APInt /// \param val the initial value of the APInt /// \param isSigned how to treat signedness of val - APInt(unsigned numBits, uint64_t val, bool isSigned = false) + /// \param implicitTrunc allow implicit truncation of non-zero/sign bits of + /// val beyond the range of numBits + APInt(unsigned numBits, uint64_t val, bool isSigned = false, + bool implicitTrunc = true) : BitWidth(numBits) { + if (!implicitTrunc) { + if (BitWidth == 0) { + assert(val == 0 && "Value must be zero for 0-bit APInt"); + } else if (isSigned) { + assert(llvm::isIntN(BitWidth, val) && + "Value is not an N-bit signed value"); + } else { + assert(llvm::isUIntN(BitWidth, val) && + "Value is not an N-bit unsigned value"); + } + } if (isSingleWord()) { U.VAL = val; - clearUnusedBits(); + if (implicitTrunc || isSigned) + clearUnusedBits(); } else { initSlowCase(val, isSigned); } diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index f39d2d56d61e89..d71ff40144c097 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -125,6 +125,12 @@ /* Define to 1 if you have the `pthread_setname_np' function. */ #cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP} +/* Define to 1 if you have the `pthread_get_name_np' function. */ +#cmakedefine HAVE_PTHREAD_GET_NAME_NP ${HAVE_PTHREAD_GET_NAME_NP} + +/* Define to 1 if you have the `pthread_set_name_np' function. */ +#cmakedefine HAVE_PTHREAD_SET_NAME_NP ${HAVE_PTHREAD_SET_NAME_NP} + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_MACH_MACH_H ${HAVE_MACH_MACH_H} diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h index f38ec2debb1813..b4f0166239520a 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h @@ -28,18 +28,12 @@ namespace llvm { static constexpr unsigned InstCombineDefaultMaxIterations = 1; struct InstCombineOptions { - bool UseLoopInfo = false; // Verify that a fix point has been reached after MaxIterations. bool VerifyFixpoint = false; unsigned MaxIterations = InstCombineDefaultMaxIterations; InstCombineOptions() = default; - InstCombineOptions &setUseLoopInfo(bool Value) { - UseLoopInfo = Value; - return *this; - } - InstCombineOptions &setVerifyFixpoint(bool Value) { VerifyFixpoint = Value; return *this; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index c2ea88a107c32a..f5f16037bef893 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -80,10 +80,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { ProfileSummaryInfo *PSI; DomConditionCache DC; - // Optional analyses. When non-null, these can both be used to do better - // combining and will be updated to reflect any changes. - LoopInfo *LI; - ReversePostOrderTraversal &RPOT; bool MadeIRChange = false; @@ -94,19 +90,25 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { /// Order of predecessors to canonicalize phi nodes towards. SmallDenseMap, 8> PredOrder; + /// Backedges, used to avoid pushing instructions across backedges in cases + /// where this may result in infinite combine loops. For irreducible loops + /// this picks an arbitrary backedge. + SmallDenseSet, 8> BackEdges; + bool ComputedBackEdges = false; + public: InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI, + ProfileSummaryInfo *PSI, const DataLayout &DL, ReversePostOrderTraversal &RPOT) : TTI(TTI), Builder(Builder), Worklist(Worklist), MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true, /*CanUseUndef*/ true, &DC), - ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI), RPOT(RPOT) {} + ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), RPOT(RPOT) {} virtual ~InstCombiner() = default; @@ -345,7 +347,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { } BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; } ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; } - LoopInfo *getLoopInfo() const { return LI; } // Call target specific combiners std::optional targetInstCombineIntrinsic(IntrinsicInst &II); @@ -359,6 +360,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { std::function SimplifyAndSetOp); + void computeBackEdges(); + bool isBackEdge(const BasicBlock *From, const BasicBlock *To) { + if (!ComputedBackEdges) + computeBackEdges(); + return BackEdges.contains({From, To}); + } + /// Inserts an instruction \p New before instruction \p Old /// /// Also adds the new instruction to the worklist and returns \p New so that diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h index 1f959311295258..61a500b82875fb 100644 --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -137,7 +137,7 @@ class SCCPSolver { const ValueLatticeElement &getLatticeValueFor(Value *V) const; /// getTrackedRetVals - Get the inferred return value map. - const MapVector &getTrackedRetVals(); + const MapVector &getTrackedRetVals() const; /// getTrackedGlobals - Get and return the set of inferred initializers for /// global variables. @@ -190,6 +190,8 @@ class SCCPSolver { bool removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU, BasicBlock *&NewUnreachableBB) const; + void inferReturnAttributes() const; + bool tryToReplaceWithConstant(Value *V); // Helper to check if \p LV is either a constant or a constant diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 72db28929c0c37..a00ed7530ebc4c 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -375,24 +375,28 @@ struct LinearExpression { APInt Scale; APInt Offset; + /// True if all operations in this expression are NUW. + bool IsNUW; /// True if all operations in this expression are NSW. bool IsNSW; LinearExpression(const CastedValue &Val, const APInt &Scale, - const APInt &Offset, bool IsNSW) - : Val(Val), Scale(Scale), Offset(Offset), IsNSW(IsNSW) {} + const APInt &Offset, bool IsNUW, bool IsNSW) + : Val(Val), Scale(Scale), Offset(Offset), IsNUW(IsNUW), IsNSW(IsNSW) {} - LinearExpression(const CastedValue &Val) : Val(Val), IsNSW(true) { + LinearExpression(const CastedValue &Val) + : Val(Val), IsNUW(true), IsNSW(true) { unsigned BitWidth = Val.getBitWidth(); Scale = APInt(BitWidth, 1); Offset = APInt(BitWidth, 0); } - LinearExpression mul(const APInt &Other, bool MulIsNSW) const { + LinearExpression mul(const APInt &Other, bool MulIsNUW, bool MulIsNSW) const { // The check for zero offset is necessary, because generally // (X +nsw Y) *nsw Z does not imply (X *nsw Z) +nsw (Y *nsw Z). bool NSW = IsNSW && (Other.isOne() || (MulIsNSW && Offset.isZero())); - return LinearExpression(Val, Scale * Other, Offset * Other, NSW); + bool NUW = IsNUW && (Other.isOne() || MulIsNUW); + return LinearExpression(Val, Scale * Other, Offset * Other, NUW, NSW); } }; } @@ -408,7 +412,7 @@ static LinearExpression GetLinearExpression( if (const ConstantInt *Const = dyn_cast(Val.V)) return LinearExpression(Val, APInt(Val.getBitWidth(), 0), - Val.evaluateWith(Const->getValue()), true); + Val.evaluateWith(Const->getValue()), true, true); if (const BinaryOperator *BOp = dyn_cast(Val.V)) { if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { @@ -444,6 +448,7 @@ static LinearExpression GetLinearExpression( E = GetLinearExpression(Val.withValue(BOp->getOperand(0), false), DL, Depth + 1, AC, DT); E.Offset += RHS; + E.IsNUW &= NUW; E.IsNSW &= NSW; break; } @@ -451,13 +456,14 @@ static LinearExpression GetLinearExpression( E = GetLinearExpression(Val.withValue(BOp->getOperand(0), false), DL, Depth + 1, AC, DT); E.Offset -= RHS; + E.IsNUW = false; // sub nuw x, y is not add nuw x, -y. E.IsNSW &= NSW; break; } case Instruction::Mul: E = GetLinearExpression(Val.withValue(BOp->getOperand(0), false), DL, Depth + 1, AC, DT) - .mul(RHS, NSW); + .mul(RHS, NUW, NSW); break; case Instruction::Shl: // We're trying to linearize an expression of the kind: @@ -472,6 +478,7 @@ static LinearExpression GetLinearExpression( Depth + 1, AC, DT); E.Offset <<= RHS.getLimitedValue(); E.Scale <<= RHS.getLimitedValue(); + E.IsNUW &= NUW; E.IsNSW &= NSW; break; } @@ -697,7 +704,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, // If the integer type is smaller than the index size, it is implicitly // sign extended or truncated to index size. bool NUSW = GEPOp->hasNoUnsignedSignedWrap(); - bool NonNeg = NUSW && GEPOp->hasNoUnsignedWrap(); + bool NUW = GEPOp->hasNoUnsignedWrap(); + bool NonNeg = NUSW && NUW; unsigned Width = Index->getType()->getIntegerBitWidth(); unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0; unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0; @@ -706,9 +714,11 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, // Scale by the type size. unsigned TypeSize = AllocTypeSize.getFixedValue(); - LE = LE.mul(APInt(IndexSize, TypeSize), NUSW); + LE = LE.mul(APInt(IndexSize, TypeSize), NUW, NUSW); Decomposed.Offset += LE.Offset.sext(MaxIndexSize); APInt Scale = LE.Scale.sext(MaxIndexSize); + if (!LE.IsNUW) + Decomposed.NWFlags = Decomposed.NWFlags.withoutNoUnsignedWrap(); // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: @@ -719,7 +729,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, areBothVScale(Decomposed.VarIndices[i].Val.V, LE.Val.V)) && Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) { Scale += Decomposed.VarIndices[i].Scale; - LE.IsNSW = false; // We cannot guarantee nsw for the merge. + // We cannot guarantee no-wrap for the merge. + LE.IsNSW = LE.IsNUW = false; Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i); break; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 32ce34114b2f50..d45d3bbefe4fd3 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -103,6 +103,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::fptoui_sat: case Intrinsic::lrint: case Intrinsic::llrint: + case Intrinsic::ucmp: + case Intrinsic::scmp: return true; default: return false; @@ -138,6 +140,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, case Intrinsic::fptoui_sat: case Intrinsic::lrint: case Intrinsic::llrint: + case Intrinsic::ucmp: + case Intrinsic::scmp: return OpdIdx == -1 || OpdIdx == 0; case Intrinsic::is_fpclass: return OpdIdx == 0; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 271a047fc6a7b8..631cc26d6022fe 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2677,7 +2677,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { } // From here on out we're working with named functions. - if (!CI->getCalledFunction()) + auto *Callee = CI->getCalledFunction(); + if (!Callee) return false; // Lower all default uses of _chk calls. This is very similar @@ -2692,6 +2693,51 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return true; } + // SCCP may have propagated, among other things, C++ static variables across + // calls. If this happens to be the case, we may want to undo it in order to + // avoid redundant pointer computation of the constant, as the function method + // returning the constant needs to be executed anyways. + auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * { + if (!F->getReturnType()->isPointerTy()) + return nullptr; + + GlobalVariable *UniformValue = nullptr; + for (auto &BB : *F) { + if (auto *RI = dyn_cast(BB.getTerminator())) { + if (auto *V = dyn_cast(RI->getReturnValue())) { + if (!UniformValue) + UniformValue = V; + else if (V != UniformValue) + return nullptr; + } else { + return nullptr; + } + } + } + + return UniformValue; + }; + + if (Callee->hasExactDefinition()) { + if (GlobalVariable *RV = GetUniformReturnValue(Callee)) { + bool MadeChange = false; + for (Use &U : make_early_inc_range(RV->uses())) { + auto *I = dyn_cast(U.getUser()); + if (!I || I->getParent() != CI->getParent()) { + // Limit to the same basic block to avoid extending the call-site live + // range, which otherwise could increase register pressure. + continue; + } + if (CI->comesBefore(I)) { + U.set(CI); + MadeChange = true; + } + } + + return MadeChange; + } + } + return false; } diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 51c50ff872ef21..7c1b90afd495e7 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -198,7 +198,7 @@ bool InitUndef::fixupIllOperand(MachineInstr *MI, MachineOperand &MO) { LLVM_DEBUG( dbgs() << "Emitting PseudoInitUndef Instruction for implicit register " - << MO.getReg() << '\n'); + << printReg(MO.getReg()) << '\n'); const TargetRegisterClass *TargetRegClass = TRI->getLargestSuperClass(MRI->getRegClass(MO.getReg())); diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 2959d3261bea71..2d95ff9e05abe7 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1789,14 +1789,14 @@ void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI, if (isLocationSpill(MI, MF, Reg)) { TKind = TransferKind::TransferSpill; LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump();); - LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) + LLVM_DEBUG(dbgs() << "Register: " << Reg.id() << " " << printReg(Reg, TRI) << "\n"); } else { if (!(Loc = isRestoreInstruction(MI, MF, Reg))) return; TKind = TransferKind::TransferRestore; LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump();); - LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI) + LLVM_DEBUG(dbgs() << "Register: " << Reg.id() << " " << printReg(Reg, TRI) << "\n"); } // Check if the register or spill location is the location of a debug value. diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 48bcc0a61e30c9..822a1beb489592 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1873,12 +1873,10 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { Builder.addImm(regSizeInBits); } - LLVM_DEBUG( - if (SpillOffset != 0) { - dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg << - " has nonzero offset\n"; - } - ); + LLVM_DEBUG(if (SpillOffset != 0) { + dbgs() << "DBG_PHI for " << printReg(Reg, TRI, SubReg) + << " has nonzero offset\n"; + }); } // If there was no mapping for a value ID, it's optimized out. Create no // DBG_PHI, and any variables using this value will become optimized out. diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 0bb7953efd52f4..0e9f041f7bfdfe 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -407,7 +407,8 @@ bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) { if (BaseReg.isValid() && lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust, LocalOffset, MI, TRI)) { - LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); + LLVM_DEBUG(dbgs() << " Reusing base register " << printReg(BaseReg) + << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; } else { diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 16dab974efacb2..ac9a3d6f0d1a60 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -120,7 +120,7 @@ Printable printReg(Register Reg, const TargetRegisterInfo *TRI, OS << '%' << Register::virtReg2Index(Reg); } } else if (!TRI) - OS << '$' << "physreg" << Reg; + OS << '$' << "physreg" << Reg.id(); else if (Reg < TRI->getNumRegs()) { OS << '$'; printLowerCase(TRI->getName(Reg), OS); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 57783bd86d364e..9fa19f16f9bdc4 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -4330,7 +4330,8 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { "Must have same number of elements"); SmallVector Args(CI->args()); - Value *NewCI = Builder.CreateCall(NewFn, Args); + CallInst *NewCI = Builder.CreateCall(NewFn, Args); + NewCI->setAttributes(CI->getAttributes()); Value *Res = PoisonValue::get(OldST); for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { Value *Elem = Builder.CreateExtractValue(NewCI, Idx); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index bd6ba34a26c8f1..a6ef390a0143c0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -995,9 +995,7 @@ Expected parseInstCombineOptions(StringRef Params) { std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); - if (ParamName == "use-loop-info") { - Result.setUseLoopInfo(Enable); - } else if (ParamName == "verify-fixpoint") { + if (ParamName == "verify-fixpoint") { Result.setVerifyFixpoint(Enable); } else if (Enable && ParamName.consume_front("max-iterations=")) { APInt MaxIterations; diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index fe22e9ba04b6f5..78d573966c6c99 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -234,7 +234,8 @@ APInt& APInt::operator-=(uint64_t RHS) { APInt APInt::operator*(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - return APInt(BitWidth, U.VAL * RHS.U.VAL); + return APInt(BitWidth, U.VAL * RHS.U.VAL, /*isSigned=*/false, + /*implicitTrunc=*/true); APInt Result(getMemory(getNumWords()), getBitWidth()); tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); @@ -455,7 +456,8 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { "Illegal bit extraction"); if (isSingleWord()) - return APInt(numBits, U.VAL >> bitPosition); + return APInt(numBits, U.VAL >> bitPosition, /*isSigned=*/false, + /*implicitTrunc=*/true); unsigned loBit = whichBit(bitPosition); unsigned loWord = whichWord(bitPosition); @@ -463,7 +465,8 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { // Single word result extracting bits from a single word source. if (loWord == hiWord) - return APInt(numBits, U.pVal[loWord] >> loBit); + return APInt(numBits, U.pVal[loWord] >> loBit, /*isSigned=*/false, + /*implicitTrunc=*/true); // Extracting bits that start on a source word boundary can be done // as a fast memory copy. @@ -907,7 +910,8 @@ APInt APInt::trunc(unsigned width) const { assert(width <= BitWidth && "Invalid APInt Truncate request"); if (width <= APINT_BITS_PER_WORD) - return APInt(width, getRawData()[0]); + return APInt(width, getRawData()[0], /*isSigned=*/false, + /*implicitTrunc=*/true); if (width == BitWidth) return *this; @@ -955,7 +959,7 @@ APInt APInt::sext(unsigned Width) const { assert(Width >= BitWidth && "Invalid APInt SignExtend request"); if (Width <= APINT_BITS_PER_WORD) - return APInt(Width, SignExtend64(U.VAL, BitWidth)); + return APInt(Width, SignExtend64(U.VAL, BitWidth), /*isSigned=*/true); if (Width == BitWidth) return *this; diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc index 839c00c5ebbf96..43e18c3a963abf 100644 --- a/llvm/lib/Support/Unix/Threading.inc +++ b/llvm/lib/Support/Unix/Threading.inc @@ -29,7 +29,7 @@ #include -#if defined(__FreeBSD__) || defined(__OpenBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) #include // For pthread_getthreadid_np() / pthread_set_name_np() #endif @@ -121,7 +121,7 @@ uint64_t llvm::get_threadid() { return InitSelf; }(); return Self; -#elif defined(__FreeBSD__) +#elif defined(__FreeBSD__) || defined(__DragonFly__) return uint64_t(pthread_getthreadid_np()); #elif defined(__NetBSD__) return uint64_t(_lwp_self()); @@ -137,20 +137,19 @@ uint64_t llvm::get_threadid() { } static constexpr uint32_t get_max_thread_name_length_impl() { -#if defined(__NetBSD__) +#if defined(PTHREAD_MAX_NAMELEN_NP) return PTHREAD_MAX_NAMELEN_NP; #elif defined(__APPLE__) return 64; -#elif defined(__linux__) -#if HAVE_PTHREAD_SETNAME_NP +#elif defined(__sun__) && defined(__svr4__) + return 31; +#elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP return 16; -#else - return 0; -#endif -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ + defined(__DragonFly__) return 16; #elif defined(__OpenBSD__) - return 32; + return 24; #else return 0; #endif @@ -174,19 +173,17 @@ void llvm::set_thread_name(const Twine &Name) { if (get_max_thread_name_length() > 0) NameStr = NameStr.take_back(get_max_thread_name_length() - 1); (void)NameStr; -#if defined(__linux__) -#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__) -#if HAVE_PTHREAD_SETNAME_NP - ::pthread_setname_np(::pthread_self(), NameStr.data()); -#endif -#endif -#elif defined(__FreeBSD__) || defined(__OpenBSD__) +#if defined(HAVE_PTHREAD_SET_NAME_NP) ::pthread_set_name_np(::pthread_self(), NameStr.data()); -#elif defined(__NetBSD__) +#elif defined(HAVE_PTHREAD_SETNAME_NP) +#if defined(__NetBSD__) ::pthread_setname_np(::pthread_self(), "%s", const_cast(NameStr.data())); #elif defined(__APPLE__) ::pthread_setname_np(NameStr.data()); +#else + ::pthread_setname_np(::pthread_self(), NameStr.data()); +#endif #endif } @@ -229,25 +226,24 @@ void llvm::get_thread_name(SmallVectorImpl &Name) { } free(kp); return; -#elif defined(__NetBSD__) +#elif defined(__linux__) && HAVE_PTHREAD_GETNAME_NP + constexpr uint32_t len = get_max_thread_name_length_impl(); + char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. + if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) + Name.append(Buffer, Buffer + strlen(Buffer)); +#elif defined(HAVE_PTHREAD_GET_NAME_NP) constexpr uint32_t len = get_max_thread_name_length_impl(); char buf[len]; - ::pthread_getname_np(::pthread_self(), buf, len); + ::pthread_get_name_np(::pthread_self(), buf, len); Name.append(buf, buf + strlen(buf)); -#elif defined(__OpenBSD__) + +#elif defined(HAVE_PTHREAD_GETNAME_NP) constexpr uint32_t len = get_max_thread_name_length_impl(); char buf[len]; - ::pthread_get_name_np(::pthread_self(), buf, len); + ::pthread_getname_np(::pthread_self(), buf, len); Name.append(buf, buf + strlen(buf)); -#elif defined(__linux__) -#if HAVE_PTHREAD_GETNAME_NP - constexpr uint32_t len = get_max_thread_name_length_impl(); - char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive. - if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len)) - Name.append(Buffer, Buffer + strlen(Buffer)); -#endif #endif } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index fa8aa0ca1c06aa..bcd07c658a092b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -40,7 +40,7 @@ FunctionPass *createSIPeepholeSDWAPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); FunctionPass *createSIShrinkInstructionsPass(); -FunctionPass *createSILoadStoreOptimizerPass(); +FunctionPass *createSILoadStoreOptimizerLegacyPass(); FunctionPass *createSIWholeQuadModePass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); @@ -190,8 +190,8 @@ extern char &AMDGPUMarkLastScratchLoadID; void initializeSILowerSGPRSpillsPass(PassRegistry &); extern char &SILowerSGPRSpillsID; -void initializeSILoadStoreOptimizerPass(PassRegistry &); -extern char &SILoadStoreOptimizerID; +void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &); +extern char &SILoadStoreOptimizerLegacyID; void initializeSIWholeQuadModePass(PassRegistry &); extern char &SIWholeQuadModeID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 9976a8199d7047..d01e3f0b97ddd1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -99,4 +99,5 @@ MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass()); MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) +MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) #undef MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b9ed6cf5f52f41..20162cce899e95 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -36,6 +36,7 @@ #include "R600TargetMachine.h" #include "SIFixSGPRCopies.h" #include "SIFoldOperands.h" +#include "SILoadStoreOptimizer.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" @@ -417,7 +418,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIShrinkInstructionsPass(*PR); initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSIOptimizeVGPRLiveRangePass(*PR); - initializeSILoadStoreOptimizerPass(*PR); + initializeSILoadStoreOptimizerLegacyPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUSwLowerLDSLegacyPass(*PR); @@ -1306,7 +1307,7 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsLegacyID); if (EnableDPPCombine) addPass(&GCNDPPCombineLegacyID); - addPass(&SILoadStoreOptimizerID); + addPass(&SILoadStoreOptimizerLegacyID); if (isPassEnabled(EnableSDWAPeephole)) { addPass(&SIPeepholeSDWAID); addPass(&EarlyMachineLICMID); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ddce80b2ae129e..1b52a48d068ebc 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -57,6 +57,7 @@ // //===----------------------------------------------------------------------===// +#include "SILoadStoreOptimizer.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -104,7 +105,7 @@ struct AddressRegs { // GFX10 image_sample instructions can have 12 vaddrs + srsrc + ssamp. const unsigned MaxAddressRegs = 12 + 1 + 1; -class SILoadStoreOptimizer : public MachineFunctionPass { +class SILoadStoreOptimizer { struct CombineInfo { MachineBasicBlock::iterator I; unsigned EltSize; @@ -295,17 +296,21 @@ class SILoadStoreOptimizer : public MachineFunctionPass { static InstClassEnum getCommonInstClass(const CombineInfo &CI, const CombineInfo &Paired); -public: - static char ID; - - SILoadStoreOptimizer() : MachineFunctionPass(ID) { - initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); - } - bool optimizeInstsWithSameBaseAddr(std::list &MergeList, bool &OptimizeListAgain); bool optimizeBlock(std::list > &MergeableInsts); +public: + SILoadStoreOptimizer(AliasAnalysis *AA) : AA(AA) {} + bool run(MachineFunction &MF); +}; + +class SILoadStoreOptimizerLegacy : public MachineFunctionPass { +public: + static char ID; + + SILoadStoreOptimizerLegacy() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "SI Load Store Optimizer"; } @@ -882,18 +887,18 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, } // end anonymous namespace. -INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SILoadStoreOptimizerLegacy, DEBUG_TYPE, "SI Load Store Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE, "SI Load Store Optimizer", - false, false) +INITIALIZE_PASS_END(SILoadStoreOptimizerLegacy, DEBUG_TYPE, + "SI Load Store Optimizer", false, false) -char SILoadStoreOptimizer::ID = 0; +char SILoadStoreOptimizerLegacy::ID = 0; -char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID; +char &llvm::SILoadStoreOptimizerLegacyID = SILoadStoreOptimizerLegacy::ID; -FunctionPass *llvm::createSILoadStoreOptimizerPass() { - return new SILoadStoreOptimizer(); +FunctionPass *llvm::createSILoadStoreOptimizerLegacyPass() { + return new SILoadStoreOptimizerLegacy(); } static void addDefsUsesToList(const MachineInstr &MI, @@ -2182,8 +2187,9 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm( return false; } - LLVM_DEBUG(dbgs() << " BASE: {" << MAddr.Base.HiReg << ", " - << MAddr.Base.LoReg << "} Offset: " << MAddr.Offset << "\n\n";); + LLVM_DEBUG(dbgs() << " BASE: {" << printReg(MAddr.Base.HiReg, TRI) << ", " + << printReg(MAddr.Base.LoReg, TRI) + << "} Offset: " << MAddr.Offset << "\n\n";); // Step2: Traverse through MI's basic block and find an anchor(that has the // same base-registers) with the highest 13bit distance from MI's offset. @@ -2522,10 +2528,15 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( return Modified; } -bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { +bool SILoadStoreOptimizerLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + return SILoadStoreOptimizer( + &getAnalysis().getAAResults()) + .run(MF); +} +bool SILoadStoreOptimizer::run(MachineFunction &MF) { STM = &MF.getSubtarget(); if (!STM->loadStoreOptEnabled()) return false; @@ -2534,7 +2545,6 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); - AA = &getAnalysis().getAAResults(); LLVM_DEBUG(dbgs() << "Running SILoadStoreOptimizer\n"); @@ -2571,3 +2581,24 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { return Modified; } + +PreservedAnalyses +SILoadStoreOptimizerPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + if (MF.getFunction().hasOptNone()) + return PreservedAnalyses::all(); + + auto &FAM = MFAM.getResult(MF) + .getManager(); + AAResults &AA = FAM.getResult(MF.getFunction()); + + bool Changed = SILoadStoreOptimizer(&AA).run(MF); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + return PA; +} diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.h b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.h new file mode 100644 index 00000000000000..6c20401d6bf5c1 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.h @@ -0,0 +1,30 @@ +//===--- SILoadStoreOptimizer.h -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SILOADSTOREOPTIMIZER_H +#define LLVM_LIB_TARGET_AMDGPU_SILOADSTOREOPTIMIZER_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class SILoadStoreOptimizerPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SILOADSTOREOPTIMIZER_H diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index df10613fcc7c93..25dfacca956bb8 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1942,11 +1942,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - Register TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. - assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); - Register AddrReg = MI.getOperand(2).getReg(); + assert(!MI.getOperand(1).isUndef() && "cannot handle undef"); + Register AddrAndTempReg = MI.getOperand(1).getReg(); + Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0); + Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1); + assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && + "tied operands have different registers"); Register DesiredReg = MI.getOperand(3).getReg(); MachineOperand New = MI.getOperand(4); New.setIsKill(false); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index ec6367a803506b..9096617a948557 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -10479,33 +10479,42 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, Results.push_back(Cycles32.getValue(1)); } -static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { - SDLoc dl(V.getNode()); - auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32); - bool isBigEndian = DAG.getDataLayout().isBigEndian(); - if (isBigEndian) - std::swap (VLo, VHi); +static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, + SDValue V1) { + SDLoc dl(V0.getNode()); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); - const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; + const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1}; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } +static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) { + SDLoc dl(V.getNode()); + auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + if (isBigEndian) + std::swap(VLo, VHi); + return createGPRPairNode2xi32(DAG, VLo, VHi); +} + static void ReplaceCMP_SWAP_64Results(SDNode *N, - SmallVectorImpl & Results, - SelectionDAG &DAG) { + SmallVectorImpl &Results, + SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"); - SDValue Ops[] = {N->getOperand(1), - createGPRPairNode(DAG, N->getOperand(2)), - createGPRPairNode(DAG, N->getOperand(3)), - N->getOperand(0)}; + SDValue Ops[] = { + createGPRPairNode2xi32(DAG, N->getOperand(1), + DAG.getUNDEF(MVT::i32)), // pointer, temp + createGPRPairNodei64(DAG, N->getOperand(2)), // expected + createGPRPairNodei64(DAG, N->getOperand(3)), // new + N->getOperand(0), // chain in + }; SDNode *CmpSwap = DAG.getMachineNode( ARM::CMP_SWAP_64, SDLoc(N), - DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); + DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops); MachineMemOperand *MemOp = cast(N)->getMemOperand(); DAG.setNodeMemRefs(cast(CmpSwap), {MemOp}); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 26f7d70b43b262..0fc561382084e3 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6509,8 +6509,21 @@ def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), - (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), +// The addr_temp and addr_temp_out operands are logically a pair of GPR +// operands: +// * addr is an input, holding the address to swap. +// * temp is a earlyclobber output, used internally in the expansion of the +// pseudo-inst. +// These are combined into one GPRPair operand to ensure that register +// allocation always succeeds. In the worst case there are only 4 GPRPair +// registers available, of which this instruction needs 3 for the other +// operands. If these operands weren't combined they would also use two GPR +// registers, which could overlap with two different GPRPairs, causing +// allocation to fail. With them combined, we need to allocate 4 GPRPairs, +// which will always succeed. +let Constraints = "@earlyclobber $Rd,$addr_temp_out = $addr_temp" in +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out), + (ins GPRPair:$addr_temp, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp index 08e5ccc7bc0be5..6343817a0616d1 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp @@ -94,7 +94,7 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { uint64_t Msb = 32; uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1); - for (; Msb < 64; ++Msb, HighMask = (HighMask << 1) + 1) { + for (; Msb < 64; ++Msb, HighMask = HighMask << 1) { for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) { uint64_t LowMask = (1ULL << Lsb) - 1; uint64_t Mask = HighMask | LowMask; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 0b0ac0c368d070..3f2e8dee76fd66 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -479,10 +479,6 @@ foreach mx = SchedMxList in { // resource, we do not need to use LMULSEWXXX constructors. However, we do // use the SEW from the name to determine the number of Cycles. -// This predicate is true when the rs2 operand of vlse or vsse is x0, false -// otherwise. -def VLDSX0Pred : MCSchedPredicate>; - foreach mx = SchedMxList in { defvar VLDSX0Cycles = SiFive7GetCyclesDefault.c; defvar Cycles = SiFive7GetCyclesOnePerElement.c; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 95fde1e53c805f..ee041ea142b94c 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -9,6 +9,10 @@ //===----------------------------------------------------------------------===// /// Define scheduler resources associated with def operands. +// This predicate is true when the rs2 operand of vlse or vsse is x0, false +// otherwise. +def VLDSX0Pred : MCSchedPredicate>; + defvar SchedMxList = ["MF8", "MF4", "MF2", "M1", "M2", "M4", "M8"]; // Used for widening and narrowing instructions as it doesn't contain M8. defvar SchedMxListW = !listremove(SchedMxList, ["M8"]); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp index 4a75bab6b95ddc..283d93408575b5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp @@ -107,7 +107,7 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) { for (auto &Elem : reverse(Stack)) { if (MO.getReg() == Elem.Reg) { auto Depth = static_cast(&Elem - &Stack[0]); - LLVM_DEBUG(dbgs() << "Debug Value VReg " << MO.getReg() + LLVM_DEBUG(dbgs() << "Debug Value VReg " << printReg(MO.getReg()) << " -> Stack Relative " << Depth << "\n"); MO.ChangeToTargetIndex(WebAssembly::TI_OPERAND_STACK, Depth); // Save the DBG_VALUE instruction that defined this stackified diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index 1203b343bf24bf..3dc9cdc11eb575 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -76,7 +76,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { break; int64_t Imm = MI.getOperand(1).getImm(); - LLVM_DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() + LLVM_DEBUG(dbgs() << "Arg VReg " << printReg(MI.getOperand(0).getReg()) << " -> WAReg " << Imm << "\n"); MFI.setWAReg(MI.getOperand(0).getReg(), Imm); } @@ -95,13 +95,14 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { continue; // Handle stackified registers. if (MFI.isVRegStackified(VReg)) { - LLVM_DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " + LLVM_DEBUG(dbgs() << "VReg " << printReg(VReg) << " -> WAReg " << (INT32_MIN | NumStackRegs) << "\n"); MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++); continue; } if (MFI.getWAReg(VReg) == WebAssembly::UnusedReg) { - LLVM_DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " << CurReg << "\n"); + LLVM_DEBUG(dbgs() << "VReg " << printReg(VReg) << " -> WAReg " << CurReg + << "\n"); MFI.setWAReg(VReg, CurReg++); } } diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 5ef08c4a2d725d..f0d75a2016363a 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -277,34 +277,10 @@ static bool runIPSCCP( // whether other functions are optimizable. SmallVector ReturnsToZap; - for (const auto &I : Solver.getTrackedRetVals()) { - Function *F = I.first; - const ValueLatticeElement &ReturnValue = I.second; - - // If there is a known constant range for the return value, add range - // attribute to the return value. - if (ReturnValue.isConstantRange() && - !ReturnValue.getConstantRange().isSingleElement()) { - // Do not add range metadata if the return value may include undef. - if (ReturnValue.isConstantRangeIncludingUndef()) - continue; - - // Take the intersection of the existing attribute and the inferred range. - ConstantRange CR = ReturnValue.getConstantRange(); - if (F->hasRetAttribute(Attribute::Range)) - CR = CR.intersectWith(F->getRetAttribute(Attribute::Range).getRange()); - F->addRangeRetAttr(CR); - continue; - } - // Infer nonnull return attribute. - if (F->getReturnType()->isPointerTy() && ReturnValue.isNotConstant() && - ReturnValue.getNotConstant()->isNullValue() && - !F->hasRetAttribute(Attribute::NonNull)) { - F->addRetAttr(Attribute::NonNull); - continue; - } - if (F->getReturnType()->isVoidTy()) - continue; + Solver.inferReturnAttributes(); + for (const auto &[F, ReturnValue] : Solver.getTrackedRetVals()) { + assert(!F->getReturnType()->isVoidTy() && + "should not track void functions"); if (SCCPSolver::isConstant(ReturnValue) || ReturnValue.isUnknownOrUndef()) findReturnsToZap(*F, ReturnsToZap, Solver); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 57f27e6a3b7fa5..a051a568bfd62e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -51,7 +51,6 @@ class DataLayout; class DominatorTree; class GEPOperator; class GlobalVariable; -class LoopInfo; class OptimizationRemarkEmitter; class ProfileSummaryInfo; class TargetLibraryInfo; @@ -66,10 +65,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, - ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI, + ProfileSummaryInfo *PSI, const DataLayout &DL, ReversePostOrderTraversal &RPOT) : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, DL, LI, RPOT) {} + BFI, BPI, PSI, DL, RPOT) {} virtual ~InstCombinerImpl() = default; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 866e5f8a00b52d..ad2a620081bcd9 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -48,7 +48,6 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -1812,12 +1811,10 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (cast(InVal)->getParent() == NonSimplifiedBB) return nullptr; - // If the incoming non-constant value is reachable from the phis block, - // we'll push the operation across a loop backedge. This could result in + // Do not push the operation across a loop backedge. This could result in // an infinite combine loop, and is generally non-profitable (especially // if the operation was originally outside the loop). - if (isPotentiallyReachable(PN->getParent(), NonSimplifiedBB, nullptr, &DT, - LI)) + if (isBackEdge(NonSimplifiedBB, PN->getParent())) return nullptr; } @@ -5390,11 +5387,23 @@ bool InstCombinerImpl::prepareWorklist(Function &F) { return MadeIRChange; } +void InstCombiner::computeBackEdges() { + // Collect backedges. + SmallPtrSet Visited; + for (BasicBlock *BB : RPOT) { + Visited.insert(BB); + for (BasicBlock *Succ : successors(BB)) + if (Visited.contains(Succ)) + BackEdges.insert({BB, Succ}); + } + ComputedBackEdges = true; +} + static bool combineInstructionsOverFunction( Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, + BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts) { auto &DL = F.getDataLayout(); @@ -5433,7 +5442,7 @@ static bool combineInstructionsOverFunction( << F.getName() << "\n"); InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, - ORE, BFI, BPI, PSI, DL, LI, RPOT); + ORE, BFI, BPI, PSI, DL, RPOT); IC.MaxArraySizeForCombine = MaxArraySize; bool MadeChangeInThisIteration = IC.prepareWorklist(F); MadeChangeInThisIteration |= IC.run(); @@ -5470,7 +5479,6 @@ void InstCombinePass::printPipeline( OS, MapClassName2PassName); OS << '<'; OS << "max-iterations=" << Options.MaxIterations << ";"; - OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;"; OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint"; OS << '>'; } @@ -5483,12 +5491,6 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto &ORE = AM.getResult(F); auto &TTI = AM.getResult(F); - // TODO: Only use LoopInfo when the option is set. This requires that the - // callers in the pass pipeline explicitly set the option. - auto *LI = AM.getCachedResult(F); - if (!LI && Options.UseLoopInfo) - LI = &AM.getResult(F); - auto *AA = &AM.getResult(F); auto &MAMProxy = AM.getResult(F); ProfileSummaryInfo *PSI = @@ -5498,7 +5500,7 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *BPI = AM.getCachedResult(F); if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, LI, Options)) + BFI, BPI, PSI, Options)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -5537,8 +5539,6 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto &ORE = getAnalysis().getORE(); // Optional analyses. - auto *LIWP = getAnalysisIfAvailable(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; ProfileSummaryInfo *PSI = &getAnalysis().getPSI(); BlockFrequencyInfo *BFI = @@ -5551,8 +5551,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { BPI = &WrapperPass->getBPI(); return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, LI, - InstCombineOptions()); + BFI, BPI, PSI, InstCombineOptions()); } char InstructionCombiningPass::ID = 0; diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index caf9f890418e29..0330460e7df8ab 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -66,6 +67,11 @@ static bool runSCCP(Function &F, const DataLayout &DL, DL, [TLI](Function &F) -> const TargetLibraryInfo & { return *TLI; }, F.getContext()); + // While we don't do any actual inter-procedural analysis, still track + // return values so we can infer attributes. + if (canTrackReturnsInterprocedurally(&F)) + Solver.addTrackedFunction(&F); + // Mark the first block of the function as being executable. Solver.markBlockExecutable(&F.front()); @@ -115,6 +121,8 @@ static bool runSCCP(Function &F, const DataLayout &DL, if (!DeadBB->hasAddressTaken()) DTU.deleteBB(DeadBB); + Solver.inferReturnAttributes(); + return MadeChanges; } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 59775d2199ca61..56e1f90f46cfd1 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -354,6 +354,34 @@ bool SCCPSolver::removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU, return true; } +void SCCPSolver::inferReturnAttributes() const { + for (const auto &[F, ReturnValue] : getTrackedRetVals()) { + + // If there is a known constant range for the return value, add range + // attribute to the return value. + if (ReturnValue.isConstantRange() && + !ReturnValue.getConstantRange().isSingleElement()) { + // Do not add range metadata if the return value may include undef. + if (ReturnValue.isConstantRangeIncludingUndef()) + continue; + + // Take the intersection of the existing attribute and the inferred range. + ConstantRange CR = ReturnValue.getConstantRange(); + if (F->hasRetAttribute(Attribute::Range)) + CR = CR.intersectWith(F->getRetAttribute(Attribute::Range).getRange()); + F->addRangeRetAttr(CR); + continue; + } + // Infer nonnull return attribute. + if (F->getReturnType()->isPointerTy() && ReturnValue.isNotConstant() && + ReturnValue.getNotConstant()->isNullValue() && + !F->hasRetAttribute(Attribute::NonNull)) { + F->addRetAttr(Attribute::NonNull); + continue; + } + } +} + /// Helper class for SCCPSolver. This implements the instruction visitor and /// holds all the state. class SCCPInstVisitor : public InstVisitor { @@ -2168,7 +2196,7 @@ const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const { } const MapVector & -SCCPSolver::getTrackedRetVals() { +SCCPSolver::getTrackedRetVals() const { return Visitor->getTrackedRetVals(); } diff --git a/llvm/test/Analysis/BasicAA/gep-nuw-alias.ll b/llvm/test/Analysis/BasicAA/gep-nuw-alias.ll index b80a457f85176c..a5f1c1c747cc3f 100644 --- a/llvm/test/Analysis/BasicAA/gep-nuw-alias.ll +++ b/llvm/test/Analysis/BasicAA/gep-nuw-alias.ll @@ -212,3 +212,106 @@ define void @both_var_idx(ptr %p, i64 %i, i64 %j) { ret void } + +; CHECK-LABEL: add_no_nuw +; CHECK: MayAlias: i8* %gep, i8* %p +define i8 @add_no_nuw(ptr %p, i64 %n) { + store i8 3, ptr %p + + %add = add i64 %n, 1 + %gep = getelementptr nuw i8, ptr %p, i64 %add + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: add_nuw +; CHECK: NoAlias: i8* %gep, i8* %p +define i8 @add_nuw(ptr %p, i64 %n) { + store i8 3, ptr %p + + %add = add nuw i64 %n, 1 + %gep = getelementptr nuw i8, ptr %p, i64 %add + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: add_no_nuw +; CHECK: MayAlias: i8* %gep, i16* %p +define i8 @add_no_nuw_scale(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add i64 %n, 1 + %gep = getelementptr nuw i16, ptr %p, i64 %add + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: add_nuw +; CHECK: NoAlias: i8* %gep, i16* %p +define i8 @add_nuw_scale(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add nuw i64 %n, 1 + %gep = getelementptr nuw i16, ptr %p, i64 %add + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: sub_nuw +; CHECK: MayAlias: i8* %gep, i8* %p +define i8 @sub_nuw(ptr %p, i64 %n) { + store i8 3, ptr %p + + %add = sub nuw i64 %n, 1 + %gep = getelementptr nuw i8, ptr %p, i64 %add + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: mul_no_nuw +; CHECK: MayAlias: i8* %gep, i16* %p +define i8 @mul_no_nuw(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add nuw i64 %n, 1 + %mul = mul i64 %add, 2 + %gep = getelementptr nuw i8, ptr %p, i64 %mul + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: mul_nuw +; CHECK: NoAlias: i8* %gep, i16* %p +define i8 @mul_nuw(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add nuw i64 %n, 1 + %mul = mul nuw i64 %add, 2 + %gep = getelementptr nuw i8, ptr %p, i64 %mul + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: shl_no_nuw +; CHECK: MayAlias: i8* %gep, i16* %p +define i8 @shl_no_nuw(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add nuw i64 %n, 1 + %shl = shl i64 %add, 1 + %gep = getelementptr nuw i8, ptr %p, i64 %shl + %val = load i8, ptr %gep + ret i8 %val +} + +; CHECK-LABEL: shl_nuw +; CHECK: NoAlias: i8* %gep, i16* %p +define i8 @shl_nuw(ptr %p, i64 %n) { + store i16 3, ptr %p + + %add = add nuw i64 %n, 1 + %shl = shl nuw i64 %add, 1 + %gep = getelementptr nuw i8, ptr %p, i64 %shl + %val = load i8, ptr %gep + ret i8 %val +} diff --git a/llvm/test/Bitcode/intrinsics-struct-upgrade-attributes.ll b/llvm/test/Bitcode/intrinsics-struct-upgrade-attributes.ll new file mode 100644 index 00000000000000..4962144899ae4b --- /dev/null +++ b/llvm/test/Bitcode/intrinsics-struct-upgrade-attributes.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } + +declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) + +; CHECK-LABEL: define %struct.__neon_int8x8x2_t @test_named_struct_return(ptr %A) { +; CHECK: %1 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr align 16 %A) +; CHECK: %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 +; CHECK: %3 = insertvalue %struct.__neon_int8x8x2_t poison, <8 x i8> %2, 0 +; CHECK: %4 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 +; CHECK: %5 = insertvalue %struct.__neon_int8x8x2_t %3, <8 x i8> %4, 1 +; CHECK: ret %struct.__neon_int8x8x2_t %5 + +define %struct.__neon_int8x8x2_t @test_named_struct_return(ptr %A) { + %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(ptr align 16 %A) + ret %struct.__neon_int8x8x2_t %val +} diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-dlc.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-dlc.mir index f4cdedf9cf6eb8..9295bd59621039 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-dlc.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-dlc.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s # The purpose of this test is to make sure we are combining relevant memory # operations correctly with/without DLC bit. diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir index c4e131b90deb48..c0cc3e9f4edd7f 100644 --- a/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir +++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-scc.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s # The purpose of this test is to make sure we are combining relevant memory # operations correctly with/without SCC bit. diff --git a/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll b/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll new file mode 100644 index 00000000000000..bcaea3d0258b70 --- /dev/null +++ b/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=LE +; RUN: llc < %s -mtriple=armv7eb-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=BE + +;; Previously, this failed during register allocation because the CMP_SWAP_64 +;; pseudo-instruction has a lot of operands, many of which need to be even-odd +;; register pairs, and the over-aligned alloca in this function causes both a +;; frame pointer and a base pointer to be needed. + +define void @test(ptr %ptr) { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r8, r9, r10, r11, lr} +; CHECK-NEXT: .setfp r11, sp, #24 +; CHECK-NEXT: add r11, sp, #24 +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: bfc sp, #0, #4 +; CHECK-NEXT: mov r6, sp +; CHECK-NEXT: str r0, [r6, #28] @ 4-byte Spill +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_1: @ %block1 +; CHECK-NEXT: ldr r0, [r6, #28] @ 4-byte Reload +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: sub r1, r1, #16 +; CHECK-NEXT: bic r1, r1, #15 +; CHECK-NEXT: mov sp, r1 +; CHECK-NEXT: dmb ish +; CHECK-NEXT: ldr r1, [r0] +; CHECK-NEXT: ldr r0, [r0, #4] +; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill +; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_2: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Loop Header: Depth=1 +; CHECK-NEXT: @ Child Loop BB0_3 Depth 2 +; CHECK-NEXT: ldr r2, [r6, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [r6, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r8, [r6, #28] @ 4-byte Reload +; LE-NEXT: str r2, [r6, #16] @ 4-byte Spill +; LE-NEXT: str r0, [r6, #12] @ 4-byte Spill +; BE-NEXT: str r2, [r6, #12] @ 4-byte Spill +; BE-NEXT: str r0, [r6, #16] @ 4-byte Spill +; CHECK-NEXT: @ implicit-def: $r1 +; CHECK-NEXT: @ implicit-def: $r3 +; CHECK-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: @ kill: def $r0 killed $r0 def $r0_r1 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r3, r12 +; CHECK-NEXT: .LBB0_3: @ %atomicrmw.start +; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ldrexd r4, r5, [r8] +; CHECK-NEXT: cmp r4, r0 +; CHECK-NEXT: cmpeq r5, r1 +; CHECK-NEXT: bne .LBB0_5 +; CHECK-NEXT: @ %bb.4: @ %atomicrmw.start +; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=2 +; CHECK-NEXT: strexd r9, r2, r3, [r8] +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: bne .LBB0_3 +; CHECK-NEXT: .LBB0_5: @ %atomicrmw.start +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: ldr r2, [r6, #12] @ 4-byte Reload +; LE-NEXT: ldr r1, [r6, #16] @ 4-byte Reload +; LE-NEXT: mov r0, r5 +; LE-NEXT: eor r3, r0, r1 +; LE-NEXT: mov r1, r4 +; LE-NEXT: eor r2, r1, r2 +; BE-NEXT: ldr r0, [r6, #16] @ 4-byte Reload +; BE-NEXT: mov r1, r4 +; BE-NEXT: eor r3, r1, r0 +; BE-NEXT: mov r0, r5 +; BE-NEXT: eor r2, r0, r2 +; CHECK-NEXT: orr r2, r2, r3 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: str r1, [r6, #20] @ 4-byte Spill +; CHECK-NEXT: str r0, [r6, #24] @ 4-byte Spill +; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_6: @ %atomicrmw.end +; CHECK-NEXT: dmb ish +; CHECK-NEXT: sub sp, r11, #24 +; CHECK-NEXT: pop {r4, r5, r6, r8, r9, r10, r11, pc} +entry: + br label %block1 + +block1: + %stuff = alloca i8, i64 16, align 16 + store atomic i64 0, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index c53fb2f330a792..14e49bf3c9376a 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -327,50 +327,56 @@ define void @test_old_store_64bit(ptr %p, i64 %v) { ; ARMOPTNONE-NEXT: push {r4, r5, r7, lr} ; ARMOPTNONE-NEXT: add r7, sp, #8 ; ARMOPTNONE-NEXT: push {r8, r10, r11} -; ARMOPTNONE-NEXT: sub sp, sp, #20 -; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: sub sp, sp, #24 +; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; ARMOPTNONE-NEXT: dmb ish ; ARMOPTNONE-NEXT: ldr r1, [r0] ; ARMOPTNONE-NEXT: ldr r0, [r0, #4] -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: b LBB5_1 ; ARMOPTNONE-NEXT: LBB5_1: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1 ; ARMOPTNONE-NEXT: @ Child Loop BB5_2 Depth 2 -; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload -; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 -; ARMOPTNONE-NEXT: mov r11, r0 -; ARMOPTNONE-NEXT: mov r8, r2 +; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload +; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: @ implicit-def: $r1 +; ARMOPTNONE-NEXT: @ implicit-def: $r9 +; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 ; ARMOPTNONE-NEXT: mov r9, r1 +; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1 +; ARMOPTNONE-NEXT: mov r1, r12 +; ARMOPTNONE-NEXT: mov r10, r2 +; ARMOPTNONE-NEXT: mov r11, r3 ; ARMOPTNONE-NEXT: LBB5_2: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ Parent Loop BB5_1 Depth=1 ; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2 -; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3] -; ARMOPTNONE-NEXT: cmp r4, r8 -; ARMOPTNONE-NEXT: cmpeq r5, r9 +; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8] +; ARMOPTNONE-NEXT: cmp r4, r10 +; ARMOPTNONE-NEXT: cmpeq r5, r11 ; ARMOPTNONE-NEXT: bne LBB5_4 ; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_2 Depth=2 -; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3] -; ARMOPTNONE-NEXT: cmp r0, #0 +; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8] +; ARMOPTNONE-NEXT: cmp r9, #0 ; ARMOPTNONE-NEXT: bne LBB5_2 ; ARMOPTNONE-NEXT: LBB5_4: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ in Loop: Header=BB5_1 Depth=1 +; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload ; ARMOPTNONE-NEXT: mov r0, r5 ; ARMOPTNONE-NEXT: eor r3, r0, r1 ; ARMOPTNONE-NEXT: mov r1, r4 ; ARMOPTNONE-NEXT: eor r2, r1, r2 ; ARMOPTNONE-NEXT: orr r2, r2, r3 ; ARMOPTNONE-NEXT: cmp r2, #0 -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: bne LBB5_1 ; ARMOPTNONE-NEXT: b LBB5_5 ; ARMOPTNONE-NEXT: LBB5_5: @ %atomicrmw.end @@ -861,52 +867,58 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { ; ARMOPTNONE-NEXT: push {r4, r5, r7, lr} ; ARMOPTNONE-NEXT: add r7, sp, #8 ; ARMOPTNONE-NEXT: push {r8, r10, r11} -; ARMOPTNONE-NEXT: sub sp, sp, #20 -; ARMOPTNONE-NEXT: str r0, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: sub sp, sp, #24 +; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill ; ARMOPTNONE-NEXT: vmov d16, r1, r2 ; ARMOPTNONE-NEXT: vmov r1, r2, d16 -; ARMOPTNONE-NEXT: str r2, [sp, #4] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; ARMOPTNONE-NEXT: dmb ish ; ARMOPTNONE-NEXT: ldr r1, [r0] ; ARMOPTNONE-NEXT: ldr r0, [r0, #4] -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: b LBB13_1 ; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1 ; ARMOPTNONE-NEXT: @ Child Loop BB13_2 Depth 2 -; ARMOPTNONE-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r3, [sp] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; ARMOPTNONE-NEXT: ldr r10, [sp, #8] @ 4-byte Reload -; ARMOPTNONE-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 -; ARMOPTNONE-NEXT: mov r11, r0 -; ARMOPTNONE-NEXT: mov r8, r2 +; ARMOPTNONE-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r12, [sp, #8] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; ARMOPTNONE-NEXT: ldr r8, [sp, #4] @ 4-byte Reload +; ARMOPTNONE-NEXT: str r3, [sp] @ 4-byte Spill +; ARMOPTNONE-NEXT: @ implicit-def: $r1 +; ARMOPTNONE-NEXT: @ implicit-def: $r9 +; ARMOPTNONE-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 ; ARMOPTNONE-NEXT: mov r9, r1 +; ARMOPTNONE-NEXT: @ kill: def $r0 killed $r0 def $r0_r1 +; ARMOPTNONE-NEXT: mov r1, r12 +; ARMOPTNONE-NEXT: mov r10, r2 +; ARMOPTNONE-NEXT: mov r11, r3 ; ARMOPTNONE-NEXT: LBB13_2: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ Parent Loop BB13_1 Depth=1 ; ARMOPTNONE-NEXT: @ => This Inner Loop Header: Depth=2 -; ARMOPTNONE-NEXT: ldrexd r4, r5, [r3] -; ARMOPTNONE-NEXT: cmp r4, r8 -; ARMOPTNONE-NEXT: cmpeq r5, r9 +; ARMOPTNONE-NEXT: ldrexd r4, r5, [r8] +; ARMOPTNONE-NEXT: cmp r4, r10 +; ARMOPTNONE-NEXT: cmpeq r5, r11 ; ARMOPTNONE-NEXT: bne LBB13_4 ; ARMOPTNONE-NEXT: @ %bb.3: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_2 Depth=2 -; ARMOPTNONE-NEXT: strexd r0, r10, r11, [r3] -; ARMOPTNONE-NEXT: cmp r0, #0 +; ARMOPTNONE-NEXT: strexd r9, r0, r1, [r8] +; ARMOPTNONE-NEXT: cmp r9, #0 ; ARMOPTNONE-NEXT: bne LBB13_2 ; ARMOPTNONE-NEXT: LBB13_4: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ in Loop: Header=BB13_1 Depth=1 +; ARMOPTNONE-NEXT: ldr r1, [sp] @ 4-byte Reload ; ARMOPTNONE-NEXT: mov r0, r5 ; ARMOPTNONE-NEXT: eor r3, r0, r1 ; ARMOPTNONE-NEXT: mov r1, r4 ; ARMOPTNONE-NEXT: eor r2, r1, r2 ; ARMOPTNONE-NEXT: orr r2, r2, r3 ; ARMOPTNONE-NEXT: cmp r2, #0 -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: bne LBB13_1 ; ARMOPTNONE-NEXT: b LBB13_5 ; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll index 161692137fc30b..a38ade7cdbf06b 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -6765,8 +6765,8 @@ entry: define i64 @test_xchg_i64() { ; CHECK-ARM8-LABEL: test_xchg_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -6781,25 +6781,29 @@ define i64 @test_xchg_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB33_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: mov r8, #1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 +; CHECK-ARM8-NEXT: mov r10, #1 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 ; CHECK-ARM8-NEXT: .LBB33_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB33_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB33_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB33_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB33_2 ; CHECK-ARM8-NEXT: .LBB33_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB33_1 Depth=1 @@ -6819,12 +6823,12 @@ define i64 @test_xchg_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_xchg_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI33_0 @@ -6838,24 +6842,28 @@ define i64 @test_xchg_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB33_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: ldr r3, .LCPI33_0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI33_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: mov r8, #1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 +; CHECK-ARM6-NEXT: mov r10, #1 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 ; CHECK-ARM6-NEXT: .LBB33_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB33_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB33_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB33_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB33_2 ; CHECK-ARM6-NEXT: .LBB33_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB33_1 Depth=1 @@ -6875,7 +6883,7 @@ define i64 @test_xchg_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI33_0: @@ -6883,8 +6891,8 @@ define i64 @test_xchg_i64() { ; ; CHECK-THUMB7-LABEL: test_xchg_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -6899,26 +6907,30 @@ define i64 @test_xchg_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB33_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: movs r0, #0 -; CHECK-THUMB7-NEXT: mov.w r8, #1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 +; CHECK-THUMB7-NEXT: mov.w r10, #1 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 ; CHECK-THUMB7-NEXT: .LBB33_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB33_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB33_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB33_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB33_2 ; CHECK-THUMB7-NEXT: .LBB33_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB33_1 Depth=1 @@ -6938,7 +6950,7 @@ define i64 @test_xchg_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_xchg_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -6975,8 +6987,8 @@ entry: define i64 @test_add_i64() { ; CHECK-ARM8-LABEL: test_add_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -6991,25 +7003,29 @@ define i64 @test_add_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB34_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: adds r8, r2, #1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 +; CHECK-ARM8-NEXT: adds r10, r2, #1 ; CHECK-ARM8-NEXT: adc r0, r1, #0 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB34_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB34_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB34_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB34_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB34_2 ; CHECK-ARM8-NEXT: .LBB34_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB34_1 Depth=1 @@ -7029,12 +7045,12 @@ define i64 @test_add_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_add_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI34_0 @@ -7048,24 +7064,28 @@ define i64 @test_add_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB34_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: adds r8, r2, #1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 +; CHECK-ARM6-NEXT: adds r10, r2, #1 ; CHECK-ARM6-NEXT: adc r0, r1, #0 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI34_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI34_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB34_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB34_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB34_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB34_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB34_2 ; CHECK-ARM6-NEXT: .LBB34_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB34_1 Depth=1 @@ -7085,7 +7105,7 @@ define i64 @test_add_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI34_0: @@ -7093,8 +7113,8 @@ define i64 @test_add_i64() { ; ; CHECK-THUMB7-LABEL: test_add_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -7109,26 +7129,30 @@ define i64 @test_add_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB34_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: adds.w r8, r2, #1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: adds.w r10, r2, #1 ; CHECK-THUMB7-NEXT: adc r0, r1, #0 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB34_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB34_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB34_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB34_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB34_2 ; CHECK-THUMB7-NEXT: .LBB34_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB34_1 Depth=1 @@ -7148,7 +7172,7 @@ define i64 @test_add_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_add_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -7185,8 +7209,8 @@ entry: define i64 @test_sub_i64() { ; CHECK-ARM8-LABEL: test_sub_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -7201,25 +7225,29 @@ define i64 @test_sub_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB35_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: subs r8, r2, #1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 +; CHECK-ARM8-NEXT: subs r10, r2, #1 ; CHECK-ARM8-NEXT: sbc r0, r1, #0 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB35_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB35_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB35_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB35_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB35_2 ; CHECK-ARM8-NEXT: .LBB35_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB35_1 Depth=1 @@ -7239,12 +7267,12 @@ define i64 @test_sub_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_sub_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI35_0 @@ -7258,24 +7286,28 @@ define i64 @test_sub_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB35_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: subs r8, r2, #1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 +; CHECK-ARM6-NEXT: subs r10, r2, #1 ; CHECK-ARM6-NEXT: sbc r0, r1, #0 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI35_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI35_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB35_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB35_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB35_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB35_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB35_2 ; CHECK-ARM6-NEXT: .LBB35_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB35_1 Depth=1 @@ -7295,7 +7327,7 @@ define i64 @test_sub_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI35_0: @@ -7303,8 +7335,8 @@ define i64 @test_sub_i64() { ; ; CHECK-THUMB7-LABEL: test_sub_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -7319,26 +7351,30 @@ define i64 @test_sub_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB35_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: subs.w r8, r2, #1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: subs.w r10, r2, #1 ; CHECK-THUMB7-NEXT: sbc r0, r1, #0 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB35_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB35_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB35_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB35_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB35_2 ; CHECK-THUMB7-NEXT: .LBB35_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB35_1 Depth=1 @@ -7358,7 +7394,7 @@ define i64 @test_sub_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_sub_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -7395,8 +7431,8 @@ entry: define i64 @test_and_i64() { ; CHECK-ARM8-LABEL: test_and_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -7411,25 +7447,29 @@ define i64 @test_and_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB36_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: and r8, r2, #1 +; CHECK-ARM8-NEXT: and r10, r2, #1 ; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB36_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB36_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB36_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB36_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB36_2 ; CHECK-ARM8-NEXT: .LBB36_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB36_1 Depth=1 @@ -7449,12 +7489,12 @@ define i64 @test_and_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_and_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI36_0 @@ -7468,24 +7508,28 @@ define i64 @test_and_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB36_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: and r8, r2, #1 +; CHECK-ARM6-NEXT: and r10, r2, #1 ; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: ldr r3, .LCPI36_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 +; CHECK-ARM6-NEXT: ldr r6, .LCPI36_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB36_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB36_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB36_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB36_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB36_2 ; CHECK-ARM6-NEXT: .LBB36_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB36_1 Depth=1 @@ -7505,7 +7549,7 @@ define i64 @test_and_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI36_0: @@ -7513,8 +7557,8 @@ define i64 @test_and_i64() { ; ; CHECK-THUMB7-LABEL: test_and_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -7529,26 +7573,30 @@ define i64 @test_and_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB36_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: and r8, r2, #1 +; CHECK-THUMB7-NEXT: and r10, r2, #1 ; CHECK-THUMB7-NEXT: movs r0, #0 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB36_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB36_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB36_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB36_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB36_2 ; CHECK-THUMB7-NEXT: .LBB36_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB36_1 Depth=1 @@ -7568,7 +7616,7 @@ define i64 @test_and_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_and_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -7605,8 +7653,8 @@ entry: define i64 @test_nand_i64() { ; CHECK-ARM8-LABEL: test_nand_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -7621,27 +7669,31 @@ define i64 @test_nand_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB37_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: mvn r0, r2 ; CHECK-ARM8-NEXT: mvn r3, #1 -; CHECK-ARM8-NEXT: orr r8, r0, r3 +; CHECK-ARM8-NEXT: orr r10, r0, r3 ; CHECK-ARM8-NEXT: mvn r0, #0 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB37_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB37_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB37_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB37_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB37_2 ; CHECK-ARM8-NEXT: .LBB37_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB37_1 Depth=1 @@ -7661,12 +7713,12 @@ define i64 @test_nand_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_nand_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI37_0 @@ -7680,26 +7732,30 @@ define i64 @test_nand_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB37_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: mvn r0, r2 ; CHECK-ARM6-NEXT: mvn r3, #1 -; CHECK-ARM6-NEXT: orr r8, r0, r3 +; CHECK-ARM6-NEXT: orr r10, r0, r3 ; CHECK-ARM6-NEXT: mvn r0, #0 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI37_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI37_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB37_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB37_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB37_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB37_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB37_2 ; CHECK-ARM6-NEXT: .LBB37_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB37_1 Depth=1 @@ -7719,7 +7775,7 @@ define i64 @test_nand_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI37_0: @@ -7727,8 +7783,8 @@ define i64 @test_nand_i64() { ; ; CHECK-THUMB7-LABEL: test_nand_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -7743,27 +7799,31 @@ define i64 @test_nand_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB37_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: mvn r0, #1 -; CHECK-THUMB7-NEXT: orn r8, r0, r2 +; CHECK-THUMB7-NEXT: orn r10, r0, r2 ; CHECK-THUMB7-NEXT: mov.w r0, #-1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB37_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB37_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB37_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB37_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB37_2 ; CHECK-THUMB7-NEXT: .LBB37_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB37_1 Depth=1 @@ -7783,7 +7843,7 @@ define i64 @test_nand_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_nand_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -7820,8 +7880,8 @@ entry: define i64 @test_or_i64() { ; CHECK-ARM8-LABEL: test_or_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -7836,24 +7896,28 @@ define i64 @test_or_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB38_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: orr r8, r2, #1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: orr r10, r2, #1 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r1 +; CHECK-ARM8-NEXT: mov r8, r2 ; CHECK-ARM8-NEXT: mov r9, r1 -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB38_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB38_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB38_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB38_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB38_2 ; CHECK-ARM8-NEXT: .LBB38_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB38_1 Depth=1 @@ -7873,12 +7937,12 @@ define i64 @test_or_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_or_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI38_0 @@ -7892,23 +7956,27 @@ define i64 @test_or_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB38_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: orr r8, r2, #1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: orr r10, r2, #1 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r1 +; CHECK-ARM6-NEXT: mov r8, r2 ; CHECK-ARM6-NEXT: mov r9, r1 -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: ldr r3, .LCPI38_0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI38_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB38_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB38_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB38_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB38_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB38_2 ; CHECK-ARM6-NEXT: .LBB38_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB38_1 Depth=1 @@ -7928,7 +7996,7 @@ define i64 @test_or_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI38_0: @@ -7936,8 +8004,8 @@ define i64 @test_or_i64() { ; ; CHECK-THUMB7-LABEL: test_or_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -7952,25 +8020,29 @@ define i64 @test_or_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB38_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: orr r8, r2, #1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB7-NEXT: orr r10, r2, #1 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r1 +; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB38_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB38_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB38_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB38_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB38_2 ; CHECK-THUMB7-NEXT: .LBB38_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB38_1 Depth=1 @@ -7990,7 +8062,7 @@ define i64 @test_or_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_or_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -8027,8 +8099,8 @@ entry: define i64 @test_xor_i64() { ; CHECK-ARM8-LABEL: test_xor_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -8043,24 +8115,28 @@ define i64 @test_xor_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB39_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: eor r8, r2, #1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM8-NEXT: eor r10, r2, #1 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r1 +; CHECK-ARM8-NEXT: mov r8, r2 ; CHECK-ARM8-NEXT: mov r9, r1 -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB39_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB39_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB39_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB39_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB39_2 ; CHECK-ARM8-NEXT: .LBB39_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB39_1 Depth=1 @@ -8080,12 +8156,12 @@ define i64 @test_xor_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_xor_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI39_0 @@ -8099,23 +8175,27 @@ define i64 @test_xor_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB39_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: eor r8, r2, #1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-ARM6-NEXT: eor r10, r2, #1 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r1 +; CHECK-ARM6-NEXT: mov r8, r2 ; CHECK-ARM6-NEXT: mov r9, r1 -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 -; CHECK-ARM6-NEXT: ldr r3, .LCPI39_0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI39_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB39_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB39_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB39_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB39_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB39_2 ; CHECK-ARM6-NEXT: .LBB39_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB39_1 Depth=1 @@ -8135,7 +8215,7 @@ define i64 @test_xor_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI39_0: @@ -8143,8 +8223,8 @@ define i64 @test_xor_i64() { ; ; CHECK-THUMB7-LABEL: test_xor_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -8159,25 +8239,29 @@ define i64 @test_xor_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB39_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: eor r8, r2, #1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 +; CHECK-THUMB7-NEXT: eor r10, r2, #1 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r1 +; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB39_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB39_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB39_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB39_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB39_2 ; CHECK-THUMB7-NEXT: .LBB39_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB39_1 Depth=1 @@ -8197,7 +8281,7 @@ define i64 @test_xor_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_xor_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -8235,8 +8319,8 @@ entry: define i64 @test_max_i64() { ; CHECK-ARM8-LABEL: test_max_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -8251,32 +8335,36 @@ define i64 @test_max_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB40_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: rsbs r0, r2, #1 ; CHECK-ARM8-NEXT: rscs r0, r1, #0 ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlt r0, #1 -; CHECK-ARM8-NEXT: mov r8, #1 +; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r8, r2 +; CHECK-ARM8-NEXT: movne r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB40_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB40_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB40_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB40_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB40_2 ; CHECK-ARM8-NEXT: .LBB40_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB40_1 Depth=1 @@ -8296,12 +8384,12 @@ define i64 @test_max_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_max_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI40_0 @@ -8315,31 +8403,35 @@ define i64 @test_max_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB40_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: rsbs r0, r2, #1 ; CHECK-ARM6-NEXT: rscs r0, r1, #0 ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlt r0, #1 -; CHECK-ARM6-NEXT: mov r8, #1 +; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r8, r2 +; CHECK-ARM6-NEXT: movne r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI40_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI40_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB40_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB40_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB40_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB40_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB40_2 ; CHECK-ARM6-NEXT: .LBB40_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB40_1 Depth=1 @@ -8359,7 +8451,7 @@ define i64 @test_max_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI40_0: @@ -8367,8 +8459,8 @@ define i64 @test_max_i64() { ; ; CHECK-THUMB7-LABEL: test_max_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -8388,31 +8480,35 @@ define i64 @test_max_i64() { ; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r0, #1 -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: mov.w r8, #1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r8, r2 +; CHECK-THUMB7-NEXT: movne r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB40_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB40_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB40_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB40_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB40_2 ; CHECK-THUMB7-NEXT: .LBB40_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB40_1 Depth=1 @@ -8432,7 +8528,7 @@ define i64 @test_max_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_max_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -8539,8 +8635,8 @@ entry: define i64 @test_min_i64() { ; CHECK-ARM8-LABEL: test_min_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -8555,32 +8651,36 @@ define i64 @test_min_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB41_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: subs r0, r2, #2 ; CHECK-ARM8-NEXT: sbcs r0, r1, #0 ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlt r0, #1 -; CHECK-ARM8-NEXT: mov r8, #1 +; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r8, r2 +; CHECK-ARM8-NEXT: movne r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB41_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB41_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB41_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB41_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB41_2 ; CHECK-ARM8-NEXT: .LBB41_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB41_1 Depth=1 @@ -8600,12 +8700,12 @@ define i64 @test_min_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_min_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI41_0 @@ -8619,31 +8719,35 @@ define i64 @test_min_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB41_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: subs r0, r2, #2 ; CHECK-ARM6-NEXT: sbcs r0, r1, #0 ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlt r0, #1 -; CHECK-ARM6-NEXT: mov r8, #1 +; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r8, r2 +; CHECK-ARM6-NEXT: movne r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI41_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI41_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB41_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB41_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB41_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB41_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB41_2 ; CHECK-ARM6-NEXT: .LBB41_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB41_1 Depth=1 @@ -8663,7 +8767,7 @@ define i64 @test_min_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI41_0: @@ -8671,8 +8775,8 @@ define i64 @test_min_i64() { ; ; CHECK-THUMB7-LABEL: test_min_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -8687,36 +8791,40 @@ define i64 @test_min_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB41_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: subs r0, r2, #2 ; CHECK-THUMB7-NEXT: sbcs r0, r1, #0 ; CHECK-THUMB7-NEXT: mov.w r0, #0 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r0, #1 -; CHECK-THUMB7-NEXT: mov.w r8, #1 +; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r8, r2 +; CHECK-THUMB7-NEXT: movne r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB41_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB41_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB41_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB41_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB41_2 ; CHECK-THUMB7-NEXT: .LBB41_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB41_1 Depth=1 @@ -8736,7 +8844,7 @@ define i64 @test_min_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_min_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -8843,8 +8951,8 @@ entry: define i64 @test_umax_i64() { ; CHECK-ARM8-LABEL: test_umax_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -8859,32 +8967,36 @@ define i64 @test_umax_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB42_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: rsbs r0, r2, #1 ; CHECK-ARM8-NEXT: rscs r0, r1, #0 ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlo r0, #1 -; CHECK-ARM8-NEXT: mov r8, #1 +; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r8, r2 +; CHECK-ARM8-NEXT: movne r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB42_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB42_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB42_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB42_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB42_2 ; CHECK-ARM8-NEXT: .LBB42_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB42_1 Depth=1 @@ -8904,12 +9016,12 @@ define i64 @test_umax_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_umax_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI42_0 @@ -8923,31 +9035,35 @@ define i64 @test_umax_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB42_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: rsbs r0, r2, #1 ; CHECK-ARM6-NEXT: rscs r0, r1, #0 ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlo r0, #1 -; CHECK-ARM6-NEXT: mov r8, #1 +; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r8, r2 +; CHECK-ARM6-NEXT: movne r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI42_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI42_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB42_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB42_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB42_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB42_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB42_2 ; CHECK-ARM6-NEXT: .LBB42_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB42_1 Depth=1 @@ -8967,7 +9083,7 @@ define i64 @test_umax_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI42_0: @@ -8975,8 +9091,8 @@ define i64 @test_umax_i64() { ; ; CHECK-THUMB7-LABEL: test_umax_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -8996,31 +9112,35 @@ define i64 @test_umax_i64() { ; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r0, #1 -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 -; CHECK-THUMB7-NEXT: mov.w r8, #1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r8, r2 +; CHECK-THUMB7-NEXT: movne r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB42_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB42_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB42_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB42_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB42_2 ; CHECK-THUMB7-NEXT: .LBB42_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB42_1 Depth=1 @@ -9040,7 +9160,7 @@ define i64 @test_umax_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_umax_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry @@ -9147,8 +9267,8 @@ entry: define i64 @test_umin_i64() { ; CHECK-ARM8-LABEL: test_umin_i64: ; CHECK-ARM8: @ %bb.0: @ %entry -; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM8-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM8-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM8-NEXT: .pad #16 ; CHECK-ARM8-NEXT: sub sp, sp, #16 ; CHECK-ARM8-NEXT: movw r0, :lower16:atomic_i64 @@ -9163,32 +9283,36 @@ define i64 @test_umin_i64() { ; CHECK-ARM8-NEXT: @ Child Loop BB43_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r6, r2 -; CHECK-ARM8-NEXT: mov r7, r1 +; CHECK-ARM8-NEXT: mov r8, r2 +; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: subs r0, r2, #2 ; CHECK-ARM8-NEXT: sbcs r0, r1, #0 ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlo r0, #1 -; CHECK-ARM8-NEXT: mov r8, #1 +; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r8, r2 +; CHECK-ARM8-NEXT: movne r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 -; CHECK-ARM8-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM8-NEXT: mov r9, r0 -; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM8-NEXT: mov r11, r0 +; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-ARM8-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-ARM8-NEXT: @ implicit-def: $r0 +; CHECK-ARM8-NEXT: @ implicit-def: $r3 +; CHECK-ARM8-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM8-NEXT: mov r7, r0 ; CHECK-ARM8-NEXT: .LBB43_2: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ Parent Loop BB43_1 Depth=1 ; CHECK-ARM8-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM8-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM8-NEXT: cmp r4, r6 -; CHECK-ARM8-NEXT: cmpeq r5, r7 +; CHECK-ARM8-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM8-NEXT: cmp r4, r8 +; CHECK-ARM8-NEXT: cmpeq r5, r9 ; CHECK-ARM8-NEXT: bne .LBB43_4 ; CHECK-ARM8-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB43_2 Depth=2 -; CHECK-ARM8-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM8-NEXT: cmp r0, #0 +; CHECK-ARM8-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM8-NEXT: cmp r7, #0 ; CHECK-ARM8-NEXT: bne .LBB43_2 ; CHECK-ARM8-NEXT: .LBB43_4: @ %atomicrmw.start ; CHECK-ARM8-NEXT: @ in Loop: Header=BB43_1 Depth=1 @@ -9208,12 +9332,12 @@ define i64 @test_umin_i64() { ; CHECK-ARM8-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM8-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: add sp, sp, #16 -; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM8-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-ARM6-LABEL: test_umin_i64: ; CHECK-ARM6: @ %bb.0: @ %entry -; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM6-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-ARM6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-ARM6-NEXT: .pad #16 ; CHECK-ARM6-NEXT: sub sp, sp, #16 ; CHECK-ARM6-NEXT: ldr r0, .LCPI43_0 @@ -9227,31 +9351,35 @@ define i64 @test_umin_i64() { ; CHECK-ARM6-NEXT: @ Child Loop BB43_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r6, r2 -; CHECK-ARM6-NEXT: mov r7, r1 +; CHECK-ARM6-NEXT: mov r8, r2 +; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: subs r0, r2, #2 ; CHECK-ARM6-NEXT: sbcs r0, r1, #0 ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlo r0, #1 -; CHECK-ARM6-NEXT: mov r8, #1 +; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r8, r2 +; CHECK-ARM6-NEXT: movne r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 -; CHECK-ARM6-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-ARM6-NEXT: mov r9, r0 -; CHECK-ARM6-NEXT: ldr r3, .LCPI43_0 +; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-ARM6-NEXT: mov r11, r0 +; CHECK-ARM6-NEXT: ldr r6, .LCPI43_0 +; CHECK-ARM6-NEXT: @ implicit-def: $r0 +; CHECK-ARM6-NEXT: @ implicit-def: $r3 +; CHECK-ARM6-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-ARM6-NEXT: mov r7, r0 ; CHECK-ARM6-NEXT: .LBB43_2: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ Parent Loop BB43_1 Depth=1 ; CHECK-ARM6-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-ARM6-NEXT: ldrexd r4, r5, [r3] -; CHECK-ARM6-NEXT: cmp r4, r6 -; CHECK-ARM6-NEXT: cmpeq r5, r7 +; CHECK-ARM6-NEXT: ldrexd r4, r5, [r6] +; CHECK-ARM6-NEXT: cmp r4, r8 +; CHECK-ARM6-NEXT: cmpeq r5, r9 ; CHECK-ARM6-NEXT: bne .LBB43_4 ; CHECK-ARM6-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB43_2 Depth=2 -; CHECK-ARM6-NEXT: strexd r0, r8, r9, [r3] -; CHECK-ARM6-NEXT: cmp r0, #0 +; CHECK-ARM6-NEXT: strexd r7, r10, r11, [r6] +; CHECK-ARM6-NEXT: cmp r7, #0 ; CHECK-ARM6-NEXT: bne .LBB43_2 ; CHECK-ARM6-NEXT: .LBB43_4: @ %atomicrmw.start ; CHECK-ARM6-NEXT: @ in Loop: Header=BB43_1 Depth=1 @@ -9271,7 +9399,7 @@ define i64 @test_umin_i64() { ; CHECK-ARM6-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-ARM6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: add sp, sp, #16 -; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-ARM6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-ARM6-NEXT: .p2align 2 ; CHECK-ARM6-NEXT: @ %bb.6: ; CHECK-ARM6-NEXT: .LCPI43_0: @@ -9279,8 +9407,8 @@ define i64 @test_umin_i64() { ; ; CHECK-THUMB7-LABEL: test_umin_i64: ; CHECK-THUMB7: @ %bb.0: @ %entry -; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-THUMB7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-THUMB7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-THUMB7-NEXT: .pad #16 ; CHECK-THUMB7-NEXT: sub sp, #16 ; CHECK-THUMB7-NEXT: movw r0, :lower16:atomic_i64 @@ -9295,36 +9423,40 @@ define i64 @test_umin_i64() { ; CHECK-THUMB7-NEXT: @ Child Loop BB43_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov r6, r2 -; CHECK-THUMB7-NEXT: mov r7, r1 +; CHECK-THUMB7-NEXT: mov r8, r2 +; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: subs r0, r2, #2 ; CHECK-THUMB7-NEXT: sbcs r0, r1, #0 ; CHECK-THUMB7-NEXT: mov.w r0, #0 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r0, #1 -; CHECK-THUMB7-NEXT: mov.w r8, #1 +; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r8, r2 +; CHECK-THUMB7-NEXT: movne r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 -; CHECK-THUMB7-NEXT: @ kill: def $r8 killed $r8 def $r8_r9 -; CHECK-THUMB7-NEXT: mov r9, r0 -; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i64 -; CHECK-THUMB7-NEXT: movt r3, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 +; CHECK-THUMB7-NEXT: mov r11, r0 +; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 +; CHECK-THUMB7-NEXT: movt r6, :upper16:atomic_i64 +; CHECK-THUMB7-NEXT: @ implicit-def: $r0 +; CHECK-THUMB7-NEXT: @ implicit-def: $r3 +; CHECK-THUMB7-NEXT: @ kill: def $r6 killed $r6 def $r6_r7 +; CHECK-THUMB7-NEXT: mov r7, r0 ; CHECK-THUMB7-NEXT: .LBB43_2: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ Parent Loop BB43_1 Depth=1 ; CHECK-THUMB7-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r3] -; CHECK-THUMB7-NEXT: cmp r4, r6 +; CHECK-THUMB7-NEXT: ldrexd r4, r5, [r6] +; CHECK-THUMB7-NEXT: cmp r4, r8 ; CHECK-THUMB7-NEXT: it eq -; CHECK-THUMB7-NEXT: cmpeq r5, r7 +; CHECK-THUMB7-NEXT: cmpeq r5, r9 ; CHECK-THUMB7-NEXT: bne .LBB43_4 ; CHECK-THUMB7-NEXT: @ %bb.3: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB43_2 Depth=2 -; CHECK-THUMB7-NEXT: strexd r0, r8, r9, [r3] -; CHECK-THUMB7-NEXT: cmp r0, #0 +; CHECK-THUMB7-NEXT: strexd r7, r10, r11, [r6] +; CHECK-THUMB7-NEXT: cmp r7, #0 ; CHECK-THUMB7-NEXT: bne .LBB43_2 ; CHECK-THUMB7-NEXT: .LBB43_4: @ %atomicrmw.start ; CHECK-THUMB7-NEXT: @ in Loop: Header=BB43_1 Depth=1 @@ -9344,7 +9476,7 @@ define i64 @test_umin_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: add sp, #16 -; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-THUMB7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-THUMB6-LABEL: test_umin_i64: ; CHECK-THUMB6: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll index 28a64db1aeba4f..9158ae0c9fe159 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll @@ -78,15 +78,14 @@ define { i32, i1 } @test_cmpxchg_32(ptr %addr, i32 %desired, i32 %new) nounwind define { i64, i1 } @test_cmpxchg_64(ptr %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: -; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [[[ADDR]]] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] ; CHECK: cmp [[OLDLO]], r6 ; CHECK: cmpeq [[OLDHI]], r7 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:[lr0-9]+]], r8, r9, [r1] +; CHECK: strexd [[STATUS:[lr0-9]+]], r8, r9, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: diff --git a/llvm/test/CodeGen/ARM/cmpxchg.mir b/llvm/test/CodeGen/ARM/cmpxchg.mir index 20ab787fb4575b..2ef3281ca733e7 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg.mir +++ b/llvm/test/CodeGen/ARM/cmpxchg.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -o - %s -mtriple=armv7-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s +# RUN: llc -o - %s -mtriple=armv7eb-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s --- name: func tracksRegLiveness: true @@ -12,23 +13,23 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $r4_r5, $r3 + ; CHECK-NEXT: liveins: $r4_r5, $r2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $r0_r1 = LDREXD $r3, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r0_r1 = LDREXD $r2, 14 /* CC::al */, $noreg ; CHECK-NEXT: CMPrr killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NEXT: CMPrr killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr ; CHECK-NEXT: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $r4_r5, $r3 + ; CHECK-NEXT: liveins: $r4_r5, $r2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber $r2 = STREXD $r4_r5, $r3, 14 /* CC::al */, $noreg - ; CHECK-NEXT: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: early-clobber $r3 = STREXD $r4_r5, $r2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: CMPri killed $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NEXT: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .3: - dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64)) + dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2_r3 = CMP_SWAP_64 killed renamable $r2_r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64)) ... --- name: func2 diff --git a/llvm/test/CodeGen/RISCV/zfbfmin.ll b/llvm/test/CodeGen/RISCV/zfbfmin.ll deleted file mode 100644 index f120185bbec003..00000000000000 --- a/llvm/test/CodeGen/RISCV/zfbfmin.ll +++ /dev/null @@ -1,92 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs \ -; RUN: -target-abi ilp32d < %s | FileCheck -check-prefix=CHECKIZFBFMIN %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs \ -; RUN: -target-abi lp64d < %s | FileCheck -check-prefix=CHECKIZFBFMIN %s - -define bfloat @bitcast_bf16_i16(i16 %a) nounwind { -; CHECKIZFBFMIN-LABEL: bitcast_bf16_i16: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fmv.h.x fa0, a0 -; CHECKIZFBFMIN-NEXT: ret - %1 = bitcast i16 %a to bfloat - ret bfloat %1 -} - -define i16 @bitcast_i16_bf16(bfloat %a) nounwind { -; CHECKIZFBFMIN-LABEL: bitcast_i16_bf16: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fmv.x.h a0, fa0 -; CHECKIZFBFMIN-NEXT: ret - %1 = bitcast bfloat %a to i16 - ret i16 %1 -} - -define bfloat @fcvt_bf16_s(float %a) nounwind { -; CHECKIZFBFMIN-LABEL: fcvt_bf16_s: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fcvt.bf16.s fa0, fa0 -; CHECKIZFBFMIN-NEXT: ret - %1 = fptrunc float %a to bfloat - ret bfloat %1 -} - -define float @fcvt_s_bf16(bfloat %a) nounwind { -; CHECKIZFBFMIN-LABEL: fcvt_s_bf16: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 -; CHECKIZFBFMIN-NEXT: ret - %1 = fpext bfloat %a to float - ret float %1 -} - -define bfloat @fcvt_bf16_d(double %a) nounwind { -; CHECKIZFBFMIN-LABEL: fcvt_bf16_d: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fcvt.s.d fa5, fa0 -; CHECKIZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5 -; CHECKIZFBFMIN-NEXT: ret - %1 = fptrunc double %a to bfloat - ret bfloat %1 -} - -define double @fcvt_d_bf16(bfloat %a) nounwind { -; CHECKIZFBFMIN-LABEL: fcvt_d_bf16: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECKIZFBFMIN-NEXT: fcvt.d.s fa0, fa5 -; CHECKIZFBFMIN-NEXT: ret - %1 = fpext bfloat %a to double - ret double %1 -} - -define bfloat @bfloat_load(ptr %a) nounwind { -; CHECKIZFBFMIN-LABEL: bfloat_load: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: flh fa0, 6(a0) -; CHECKIZFBFMIN-NEXT: ret - %1 = getelementptr bfloat, ptr %a, i32 3 - %2 = load bfloat, ptr %1 - ret bfloat %2 -} - -define bfloat @bfloat_imm() nounwind { -; CHECKIZFBFMIN-LABEL: bfloat_imm: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: lui a0, %hi(.LCPI7_0) -; CHECKIZFBFMIN-NEXT: flh fa0, %lo(.LCPI7_0)(a0) -; CHECKIZFBFMIN-NEXT: ret - ret bfloat 3.0 -} - -define dso_local void @bfloat_store(ptr %a, bfloat %b) nounwind { -; CHECKIZFBFMIN-LABEL: bfloat_store: -; CHECKIZFBFMIN: # %bb.0: -; CHECKIZFBFMIN-NEXT: fsh fa0, 0(a0) -; CHECKIZFBFMIN-NEXT: fsh fa0, 16(a0) -; CHECKIZFBFMIN-NEXT: ret - store bfloat %b, ptr %a - %1 = getelementptr bfloat, ptr %a, i32 8 - store bfloat %b, ptr %1 - ret void -} diff --git a/llvm/test/CodeGen/Thumb2/cmpxchg.mir b/llvm/test/CodeGen/Thumb2/cmpxchg.mir index 33de25d469a757..c1adb465380f8e 100644 --- a/llvm/test/CodeGen/Thumb2/cmpxchg.mir +++ b/llvm/test/CodeGen/Thumb2/cmpxchg.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -o - %s -mtriple=thumbv7-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s +# RUN: llc -o - %s -mtriple=thumbv7eb-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s --- name: func tracksRegLiveness: true @@ -12,23 +13,23 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .1: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $r4, $r5, $r3 + ; CHECK-NEXT: liveins: $r4, $r5, $r2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $r0, $r1 = t2LDREXD $r3, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r0, $r1 = t2LDREXD $r2, 14 /* CC::al */, $noreg ; CHECK-NEXT: tCMPhir killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NEXT: tCMPhir killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr ; CHECK-NEXT: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .2: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $r4, $r5, $r3 + ; CHECK-NEXT: liveins: $r4, $r5, $r2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber $r2 = t2STREXD $r4, $r5, $r3, 14 /* CC::al */, $noreg - ; CHECK-NEXT: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: early-clobber $r3 = t2STREXD $r4, $r5, $r2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: t2CMPri killed $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: .3: - dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64)) + dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2_r3 = CMP_SWAP_64 killed renamable $r2_r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64)) ... --- name: func2 diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index 233016ff929460..6e67bd444963db 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -92,8 +92,8 @@ ; CHECK-27: function(separate-const-offset-from-gep) ;; Test InstCombine options - the first pass checks default settings, and the second checks customized options. -; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 -; CHECK-28: function(instcombine,instcombine) +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 +; CHECK-28: function(instcombine,instcombine) ;; Test function-attrs ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='cgscc(function-attrs)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-29 diff --git a/llvm/test/Transforms/CodeGenPrepare/revert-constant-ptr-propagation-on-calls.ll b/llvm/test/Transforms/CodeGenPrepare/revert-constant-ptr-propagation-on-calls.ll new file mode 100644 index 00000000000000..8b9e60214057d6 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/revert-constant-ptr-propagation-on-calls.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +; REQUIRES: aarch64-registered-target + +%struct.S = type { i8 } +%struct.X = type { i32 } + +@g_getS = internal global %struct.S zeroinitializer, align 1 +@g_getX = internal global %struct.X zeroinitializer, align 1 +@guard = internal global i64 0, align 8 + +declare ptr @getS_dec() +declare extern_weak dllimport ptr @getS_dllimport_function() + +define ptr @getS() personality ptr @__gxx_personality_v0 { +entry: + %guard = load atomic i8, ptr @guard acquire, align 8 + %mask = and i8 %guard, 1 + %cond = icmp eq i8 %mask, 0 + br i1 %cond, label %to_be_init, label %return + +to_be_init: ; preds = %entry + %is_init = call i32 @__cxa_guard_acquire(ptr @guard) + %cond.2 = icmp ne i32 %is_init, 0 + br i1 %cond.2, label %ctor, label %return + +ctor: ; preds = %to_be_init + invoke void @S_ctor(ptr @g_getS) + to label %continue unwind label %landing_pad + +continue: ; preds = %ctor + call void @__cxa_guard_release(ptr @guard) + br label %return + +return: ; preds = %continue, %to_be_init, %entry + ret ptr @g_getS + +landing_pad: ; preds = %ctor + %lp = landingpad { ptr, i32 } cleanup + call void @__cxa_guard_abort(ptr @guard) + resume { ptr, i32 } %lp +} + +define ptr @getS_or_getX(i1 %cond) { +entry: + %result = select i1 %cond, ptr @g_getS, ptr @g_getX + ret ptr %result +} + +define weak ptr @getS_weak_function() { +entry: + ret ptr @g_getS +} + +define linkonce_odr ptr @getS_linkonce_odr_function() { +entry: + ret ptr @g_getS +} + +; May revert propagation. +define i32 @caller_1() { +; CHECK-LABEL: @caller_1( +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS() +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr [[GETS_PTR]]) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_ptr = call ptr @getS() + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +; Cannot revert propagation due to use appearing in a different basic block. +define i32 @caller_2() { +; CHECK-LABEL: @caller_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS() +; CHECK-NEXT: br label [[USE:%.*]] +; CHECK: use: +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr [[GETS_PTR]]) +; CHECK-NEXT: ret i32 [[GETI]] +; +entry: + %getS_ptr = call ptr @getS() + br label %use + +use: ; preds = %entry + %getI = call i32 @S_getI(ptr %getS_ptr) + ret i32 %getI +} + +; Cannot revert propagation due to use before call. +define i32 @caller_3() { +; CHECK-LABEL: @caller_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS() +; CHECK-NEXT: ret i32 [[GETI]] +; +entry: + %getI = call i32 @S_getI(ptr @g_getS) + %getS_ptr = call ptr @getS() + ret i32 %getI +} + +; Cannot revert propagation due to non-uniform returned constant. +define i32 @caller_4(i1 %cond) { +; CHECK-LABEL: @caller_4( +; CHECK-NEXT: [[GETS_OR_GETX_PTR:%.*]] = call ptr @getS_or_getX(i1 [[COND:%.*]]) +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_or_getX_ptr = call ptr @getS_or_getX(i1 %cond) + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +; Cannot revert propagation due to weak-linkage callee. +define i32 @caller_5() { +; CHECK-LABEL: @caller_5( +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS_weak_function() +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_ptr = call ptr @getS_weak_function() + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +; Cannot revert propagation due to callee with external function definition. +define i32 @caller_6() { +; CHECK-LABEL: @caller_6( +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS_dec() +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_ptr = call ptr @getS_dec() + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +; Cannot revert propagation due to callee with DLLImport storage class. +define i32 @caller_7() { +; CHECK-LABEL: @caller_7( +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS_dllimport_function() +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_ptr = call ptr @getS_dllimport_function() + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +; Cannot revert propagation due to callee whose definition may be overridden. +define i32 @caller_8() { +; CHECK-LABEL: @caller_8( +; CHECK-NEXT: [[GETS_PTR:%.*]] = call ptr @getS_linkonce_odr_function() +; CHECK-NEXT: [[GETI:%.*]] = call i32 @S_getI(ptr @g_getS) +; CHECK-NEXT: ret i32 [[GETI]] +; + %getS_ptr = call ptr @getS_linkonce_odr_function() + %getI = call i32 @S_getI(ptr @g_getS) + ret i32 %getI +} + +declare i32 @__cxa_guard_acquire(ptr) +declare void @S_ctor(ptr) +declare i32 @S_getI(ptr) +declare void @__cxa_guard_abort(ptr) +declare void @__cxa_guard_release(ptr) +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll index 99cdb6bc760b46..b2bc1abeaba568 100644 --- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll +++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='instcombine' -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index ba29f4290a9fa4..3b1fa3a97d9cd7 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -2721,11 +2721,11 @@ define void @phi_op_in_loop(i1 %c, i32 %x) { ; CHECK: loop: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[IF]] ], [ 0, [[LOOP]] ] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[PHI]], 1 -; CHECK-NEXT: call void @use(i32 [[AND]]) +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[IF]] ], [ 0, [[LOOP]] ] +; CHECK-NEXT: call void @use(i32 [[PHI]]) ; CHECK-NEXT: br label [[LOOP]] ; br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 08d05a1e2db69f..c51ba0e5b6ea4e 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -5504,11 +5504,11 @@ define i32 @PR32419(i32 %a, i16 %b) { ; INTERLEAVE-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] ; INTERLEAVE: for.cond: ; INTERLEAVE-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] +; INTERLEAVE-NEXT: [[TMP50:%.*]] = sext i16 [[VAR3]] to i32 ; INTERLEAVE-NEXT: br label [[FOR_INC]] ; INTERLEAVE: for.inc: -; INTERLEAVE-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] -; INTERLEAVE-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 -; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] +; INTERLEAVE-NEXT: [[VAR4:%.*]] = phi i32 [ [[TMP50]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] +; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR4]] ; INTERLEAVE-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 ; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 ; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] diff --git a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll index 35d5ceeb91950f..871615dbd62852 100644 --- a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll +++ b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll @@ -2,7 +2,7 @@ ; RUN: opt -O1 -S < %s | FileCheck %s define i32 @testa(i32 %mul) { -; CHECK-LABEL: define range(i32 -65536, 65536) i32 @testa( +; CHECK-LABEL: define range(i32 -65536, 32768) i32 @testa( ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15 ; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767) @@ -16,7 +16,7 @@ define i32 @testa(i32 %mul) { } define i32 @testb(i32 %mul) { -; CHECK-LABEL: define range(i32 -16777216, 16777216) i32 @testb( +; CHECK-LABEL: define range(i32 -128, 128) i32 @testb( ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[SHR102:%.*]] = ashr i32 [[MUL]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128) diff --git a/llvm/test/Transforms/SCCP/exact-flags.ll b/llvm/test/Transforms/SCCP/exact-flags.ll index a5e3bf111bbd9d..f860ddb6fe9cfb 100644 --- a/llvm/test/Transforms/SCCP/exact-flags.ll +++ b/llvm/test/Transforms/SCCP/exact-flags.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=sccp < %s -S | FileCheck %s define i8 @ashr_to_lshr(i8 %x, i8 %y) { -; CHECK-LABEL: define i8 @ashr_to_lshr( +; CHECK-LABEL: define range(i8 0, -128) i8 @ashr_to_lshr( ; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { ; CHECK-NEXT: [[P:%.*]] = and i8 [[X]], 127 ; CHECK-NEXT: [[R:%.*]] = lshr exact i8 [[P]], [[Y]] @@ -14,7 +14,7 @@ define i8 @ashr_to_lshr(i8 %x, i8 %y) { } define i8 @sdiv_to_udiv(i8 %x, i8 %y) { -; CHECK-LABEL: define i8 @sdiv_to_udiv( +; CHECK-LABEL: define range(i8 0, -128) i8 @sdiv_to_udiv( ; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { ; CHECK-NEXT: [[X1:%.*]] = and i8 [[X]], 127 ; CHECK-NEXT: [[Y1:%.*]] = and i8 [[Y]], 127 diff --git a/llvm/test/Transforms/SCCP/phis.ll b/llvm/test/Transforms/SCCP/phis.ll index 9264a6eaefb85d..dae843ca955955 100644 --- a/llvm/test/Transforms/SCCP/phis.ll +++ b/llvm/test/Transforms/SCCP/phis.ll @@ -100,7 +100,7 @@ end: } define <2 x i16> @phi_vector_merge1(i1 %c, <2 x i8> %a) { -; CHECK-LABEL: define <2 x i16> @phi_vector_merge1( +; CHECK-LABEL: define range(i16 2, 259) <2 x i16> @phi_vector_merge1( ; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> @@ -126,7 +126,7 @@ join: } define <2 x i16> @phi_vector_merge2(i1 %c, <2 x i8> %a) { -; CHECK-LABEL: define <2 x i16> @phi_vector_merge2( +; CHECK-LABEL: define range(i16 2, 259) <2 x i16> @phi_vector_merge2( ; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16> diff --git a/llvm/test/Transforms/SCCP/pointer-nonnull.ll b/llvm/test/Transforms/SCCP/pointer-nonnull.ll index 08d4a76345bb63..c3a6a762e31744 100644 --- a/llvm/test/Transforms/SCCP/pointer-nonnull.ll +++ b/llvm/test/Transforms/SCCP/pointer-nonnull.ll @@ -232,13 +232,9 @@ define i1 @ip_test_nonnull_caller(ptr %p) { } define ptr @ret_nonnull_pointer(ptr nonnull %p) { -; SCCP-LABEL: define ptr @ret_nonnull_pointer( -; SCCP-SAME: ptr nonnull [[P:%.*]]) { -; SCCP-NEXT: ret ptr [[P]] -; -; IPSCCP-LABEL: define nonnull ptr @ret_nonnull_pointer( -; IPSCCP-SAME: ptr nonnull [[P:%.*]]) { -; IPSCCP-NEXT: ret ptr [[P]] +; CHECK-LABEL: define nonnull ptr @ret_nonnull_pointer( +; CHECK-SAME: ptr nonnull [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] ; ret ptr %p } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll new file mode 100644 index 00000000000000..fd56d1632fa000 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-scmp.ll @@ -0,0 +1,646 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX + +@a64 = common global [8 x i64] zeroinitializer, align 64 +@b64 = common global [8 x i64] zeroinitializer, align 64 +@c64 = common global [8 x i64] zeroinitializer, align 64 +@a32 = common global [16 x i32] zeroinitializer, align 64 +@b32 = common global [16 x i32] zeroinitializer, align 64 +@c32 = common global [16 x i32] zeroinitializer, align 64 +@a16 = common global [32 x i16] zeroinitializer, align 64 +@b16 = common global [32 x i16] zeroinitializer, align 64 +@c16 = common global [32 x i16] zeroinitializer, align 64 +@a8 = common global [64 x i8] zeroinitializer, align 64 +@b8 = common global [64 x i8] zeroinitializer, align 64 +@c8 = common global [64 x i8] zeroinitializer, align 64 + +define void @scmp_v8i64() { +; AVX-LABEL: @scmp_v8i64( +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 +; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) +; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8 +; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) +; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @scmp_v8i64( +; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 +; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 +; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) +; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: ret void +; + %a0 = load i64, ptr @a64, align 8 + %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 + %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 + %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 + %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 + %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 + %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 + %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 + %b0 = load i64, ptr @b64, align 8 + %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 + %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 + %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 + %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 + %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 + %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 + %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 + %r0 = call i64 @llvm.scmp.i64.i64(i64 %a0, i64 %b0) + %r1 = call i64 @llvm.scmp.i64.i64(i64 %a1, i64 %b1) + %r2 = call i64 @llvm.scmp.i64.i64(i64 %a2, i64 %b2) + %r3 = call i64 @llvm.scmp.i64.i64(i64 %a3, i64 %b3) + %r4 = call i64 @llvm.scmp.i64.i64(i64 %a4, i64 %b4) + %r5 = call i64 @llvm.scmp.i64.i64(i64 %a5, i64 %b5) + %r6 = call i64 @llvm.scmp.i64.i64(i64 %a6, i64 %b6) + %r7 = call i64 @llvm.scmp.i64.i64(i64 %a7, i64 %b7) + store i64 %r0, ptr @c64, align 8 + store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 + store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 + store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 + store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 + store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 + store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 + store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 + ret void +} + +define void @scmp_v16i32() { +; SSE-LABEL: @scmp_v16i32( +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 +; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) +; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) +; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) +; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: ret void +; +; AVX-LABEL: @scmp_v16i32( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 +; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4 +; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) +; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @scmp_v16i32( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 +; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 +; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) +; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 + %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 + %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 + %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 + %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 + %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 + %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4 + %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4 + %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4 + %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4 + %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4 + %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4 + %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4 + %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4 + %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4 + %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4 + %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 + %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 + %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 + %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 + %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 + %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 + %r0 = call i32 @llvm.scmp.i32.i32(i32 %a0 , i32 %b0 ) + %r1 = call i32 @llvm.scmp.i32.i32(i32 %a1 , i32 %b1 ) + %r2 = call i32 @llvm.scmp.i32.i32(i32 %a2 , i32 %b2 ) + %r3 = call i32 @llvm.scmp.i32.i32(i32 %a3 , i32 %b3 ) + %r4 = call i32 @llvm.scmp.i32.i32(i32 %a4 , i32 %b4 ) + %r5 = call i32 @llvm.scmp.i32.i32(i32 %a5 , i32 %b5 ) + %r6 = call i32 @llvm.scmp.i32.i32(i32 %a6 , i32 %b6 ) + %r7 = call i32 @llvm.scmp.i32.i32(i32 %a7 , i32 %b7 ) + %r8 = call i32 @llvm.scmp.i32.i32(i32 %a8 , i32 %b8 ) + %r9 = call i32 @llvm.scmp.i32.i32(i32 %a9 , i32 %b9 ) + %r10 = call i32 @llvm.scmp.i32.i32(i32 %a10, i32 %b10) + %r11 = call i32 @llvm.scmp.i32.i32(i32 %a11, i32 %b11) + %r12 = call i32 @llvm.scmp.i32.i32(i32 %a12, i32 %b12) + %r13 = call i32 @llvm.scmp.i32.i32(i32 %a13, i32 %b13) + %r14 = call i32 @llvm.scmp.i32.i32(i32 %a14, i32 %b14) + %r15 = call i32 @llvm.scmp.i32.i32(i32 %a15, i32 %b15) + store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 + store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 + store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 + store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 + store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 + store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 + store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 + ret void +} + +define void @scmp_v32i16() { +; SSE-LABEL: @scmp_v32i16( +; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2 +; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2 +; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2 +; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]]) +; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) +; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 +; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 +; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) +; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2 +; SSE-NEXT: ret void +; +; AVX-LABEL: @scmp_v32i16( +; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2 +; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2 +; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2 +; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 +; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 +; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]]) +; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @scmp_v32i16( +; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2 +; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2 +; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) +; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2 +; AVX512-NEXT: ret void +; + %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2 + %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2 + %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2 + %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2 + %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2 + %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2 + %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2 + %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2 + %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2 + %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2 + %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2 + %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2 + %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2 + %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2 + %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2 + %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2 + %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 + %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2 + %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2 + %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2 + %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2 + %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2 + %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2 + %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2 + %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 + %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2 + %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2 + %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2 + %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2 + %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2 + %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2 + %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2 + %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2 + %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2 + %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2 + %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2 + %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2 + %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2 + %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2 + %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2 + %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2 + %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2 + %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2 + %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2 + %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2 + %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2 + %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2 + %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2 + %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 + %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2 + %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2 + %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2 + %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2 + %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2 + %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2 + %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2 + %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 + %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2 + %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2 + %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2 + %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2 + %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2 + %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2 + %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2 + %r0 = call i16 @llvm.scmp.i16.i16(i16 %a0 , i16 %b0 ) + %r1 = call i16 @llvm.scmp.i16.i16(i16 %a1 , i16 %b1 ) + %r2 = call i16 @llvm.scmp.i16.i16(i16 %a2 , i16 %b2 ) + %r3 = call i16 @llvm.scmp.i16.i16(i16 %a3 , i16 %b3 ) + %r4 = call i16 @llvm.scmp.i16.i16(i16 %a4 , i16 %b4 ) + %r5 = call i16 @llvm.scmp.i16.i16(i16 %a5 , i16 %b5 ) + %r6 = call i16 @llvm.scmp.i16.i16(i16 %a6 , i16 %b6 ) + %r7 = call i16 @llvm.scmp.i16.i16(i16 %a7 , i16 %b7 ) + %r8 = call i16 @llvm.scmp.i16.i16(i16 %a8 , i16 %b8 ) + %r9 = call i16 @llvm.scmp.i16.i16(i16 %a9 , i16 %b9 ) + %r10 = call i16 @llvm.scmp.i16.i16(i16 %a10, i16 %b10) + %r11 = call i16 @llvm.scmp.i16.i16(i16 %a11, i16 %b11) + %r12 = call i16 @llvm.scmp.i16.i16(i16 %a12, i16 %b12) + %r13 = call i16 @llvm.scmp.i16.i16(i16 %a13, i16 %b13) + %r14 = call i16 @llvm.scmp.i16.i16(i16 %a14, i16 %b14) + %r15 = call i16 @llvm.scmp.i16.i16(i16 %a15, i16 %b15) + %r16 = call i16 @llvm.scmp.i16.i16(i16 %a16, i16 %b16) + %r17 = call i16 @llvm.scmp.i16.i16(i16 %a17, i16 %b17) + %r18 = call i16 @llvm.scmp.i16.i16(i16 %a18, i16 %b18) + %r19 = call i16 @llvm.scmp.i16.i16(i16 %a19, i16 %b19) + %r20 = call i16 @llvm.scmp.i16.i16(i16 %a20, i16 %b20) + %r21 = call i16 @llvm.scmp.i16.i16(i16 %a21, i16 %b21) + %r22 = call i16 @llvm.scmp.i16.i16(i16 %a22, i16 %b22) + %r23 = call i16 @llvm.scmp.i16.i16(i16 %a23, i16 %b23) + %r24 = call i16 @llvm.scmp.i16.i16(i16 %a24, i16 %b24) + %r25 = call i16 @llvm.scmp.i16.i16(i16 %a25, i16 %b25) + %r26 = call i16 @llvm.scmp.i16.i16(i16 %a26, i16 %b26) + %r27 = call i16 @llvm.scmp.i16.i16(i16 %a27, i16 %b27) + %r28 = call i16 @llvm.scmp.i16.i16(i16 %a28, i16 %b28) + %r29 = call i16 @llvm.scmp.i16.i16(i16 %a29, i16 %b29) + %r30 = call i16 @llvm.scmp.i16.i16(i16 %a30, i16 %b30) + %r31 = call i16 @llvm.scmp.i16.i16(i16 %a31, i16 %b31) + store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2 + store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2 + store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2 + store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2 + store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2 + store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2 + store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2 + store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2 + store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2 + store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2 + store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2 + store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2 + store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2 + store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2 + store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2 + store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2 + store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 + store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2 + store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2 + store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2 + store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2 + store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2 + store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2 + store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2 + store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2 + store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2 + store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2 + store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2 + store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2 + store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2 + store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2 + store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2 + ret void +} + +define void @scmp_v64i8() { +; SSE-LABEL: @scmp_v64i8( +; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1 +; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1 +; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1 +; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) +; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 +; SSE-NEXT: ret void +; +; AVX-LABEL: @scmp_v64i8( +; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1 +; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1 +; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]]) +; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1 +; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 +; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 +; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]]) +; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @scmp_v64i8( +; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1 +; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1 +; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]]) +; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1 +; AVX512-NEXT: ret void +; + %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1 + %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1 + %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1 + %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1 + %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1 + %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1 + %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1 + %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1 + %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1 + %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1 + %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1 + %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1 + %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1 + %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1 + %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1 + %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1 + %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 + %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1 + %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1 + %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1 + %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1 + %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1 + %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1 + %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1 + %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1 + %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1 + %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1 + %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1 + %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1 + %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1 + %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1 + %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1 + %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 + %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1 + %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1 + %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1 + %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1 + %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1 + %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1 + %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1 + %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1 + %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1 + %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1 + %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1 + %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1 + %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1 + %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1 + %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1 + %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 + %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1 + %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1 + %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1 + %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1 + %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1 + %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1 + %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1 + %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1 + %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1 + %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1 + %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1 + %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1 + %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1 + %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1 + %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1 + %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1 + %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1 + %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1 + %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1 + %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1 + %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1 + %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1 + %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1 + %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1 + %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1 + %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1 + %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1 + %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1 + %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1 + %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1 + %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1 + %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 + %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1 + %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1 + %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1 + %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1 + %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1 + %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1 + %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1 + %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1 + %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1 + %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1 + %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1 + %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1 + %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1 + %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1 + %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1 + %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 + %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1 + %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1 + %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1 + %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1 + %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1 + %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1 + %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1 + %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1 + %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1 + %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1 + %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1 + %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1 + %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1 + %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1 + %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1 + %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 + %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1 + %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1 + %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1 + %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1 + %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1 + %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1 + %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1 + %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1 + %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1 + %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1 + %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1 + %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1 + %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1 + %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1 + %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1 + %r0 = call i8 @llvm.scmp.i8.i8(i8 %a0 , i8 %b0 ) + %r1 = call i8 @llvm.scmp.i8.i8(i8 %a1 , i8 %b1 ) + %r2 = call i8 @llvm.scmp.i8.i8(i8 %a2 , i8 %b2 ) + %r3 = call i8 @llvm.scmp.i8.i8(i8 %a3 , i8 %b3 ) + %r4 = call i8 @llvm.scmp.i8.i8(i8 %a4 , i8 %b4 ) + %r5 = call i8 @llvm.scmp.i8.i8(i8 %a5 , i8 %b5 ) + %r6 = call i8 @llvm.scmp.i8.i8(i8 %a6 , i8 %b6 ) + %r7 = call i8 @llvm.scmp.i8.i8(i8 %a7 , i8 %b7 ) + %r8 = call i8 @llvm.scmp.i8.i8(i8 %a8 , i8 %b8 ) + %r9 = call i8 @llvm.scmp.i8.i8(i8 %a9 , i8 %b9 ) + %r10 = call i8 @llvm.scmp.i8.i8(i8 %a10, i8 %b10) + %r11 = call i8 @llvm.scmp.i8.i8(i8 %a11, i8 %b11) + %r12 = call i8 @llvm.scmp.i8.i8(i8 %a12, i8 %b12) + %r13 = call i8 @llvm.scmp.i8.i8(i8 %a13, i8 %b13) + %r14 = call i8 @llvm.scmp.i8.i8(i8 %a14, i8 %b14) + %r15 = call i8 @llvm.scmp.i8.i8(i8 %a15, i8 %b15) + %r16 = call i8 @llvm.scmp.i8.i8(i8 %a16, i8 %b16) + %r17 = call i8 @llvm.scmp.i8.i8(i8 %a17, i8 %b17) + %r18 = call i8 @llvm.scmp.i8.i8(i8 %a18, i8 %b18) + %r19 = call i8 @llvm.scmp.i8.i8(i8 %a19, i8 %b19) + %r20 = call i8 @llvm.scmp.i8.i8(i8 %a20, i8 %b20) + %r21 = call i8 @llvm.scmp.i8.i8(i8 %a21, i8 %b21) + %r22 = call i8 @llvm.scmp.i8.i8(i8 %a22, i8 %b22) + %r23 = call i8 @llvm.scmp.i8.i8(i8 %a23, i8 %b23) + %r24 = call i8 @llvm.scmp.i8.i8(i8 %a24, i8 %b24) + %r25 = call i8 @llvm.scmp.i8.i8(i8 %a25, i8 %b25) + %r26 = call i8 @llvm.scmp.i8.i8(i8 %a26, i8 %b26) + %r27 = call i8 @llvm.scmp.i8.i8(i8 %a27, i8 %b27) + %r28 = call i8 @llvm.scmp.i8.i8(i8 %a28, i8 %b28) + %r29 = call i8 @llvm.scmp.i8.i8(i8 %a29, i8 %b29) + %r30 = call i8 @llvm.scmp.i8.i8(i8 %a30, i8 %b30) + %r31 = call i8 @llvm.scmp.i8.i8(i8 %a31, i8 %b31) + %r32 = call i8 @llvm.scmp.i8.i8(i8 %a32, i8 %b32) + %r33 = call i8 @llvm.scmp.i8.i8(i8 %a33, i8 %b33) + %r34 = call i8 @llvm.scmp.i8.i8(i8 %a34, i8 %b34) + %r35 = call i8 @llvm.scmp.i8.i8(i8 %a35, i8 %b35) + %r36 = call i8 @llvm.scmp.i8.i8(i8 %a36, i8 %b36) + %r37 = call i8 @llvm.scmp.i8.i8(i8 %a37, i8 %b37) + %r38 = call i8 @llvm.scmp.i8.i8(i8 %a38, i8 %b38) + %r39 = call i8 @llvm.scmp.i8.i8(i8 %a39, i8 %b39) + %r40 = call i8 @llvm.scmp.i8.i8(i8 %a40, i8 %b40) + %r41 = call i8 @llvm.scmp.i8.i8(i8 %a41, i8 %b41) + %r42 = call i8 @llvm.scmp.i8.i8(i8 %a42, i8 %b42) + %r43 = call i8 @llvm.scmp.i8.i8(i8 %a43, i8 %b43) + %r44 = call i8 @llvm.scmp.i8.i8(i8 %a44, i8 %b44) + %r45 = call i8 @llvm.scmp.i8.i8(i8 %a45, i8 %b45) + %r46 = call i8 @llvm.scmp.i8.i8(i8 %a46, i8 %b46) + %r47 = call i8 @llvm.scmp.i8.i8(i8 %a47, i8 %b47) + %r48 = call i8 @llvm.scmp.i8.i8(i8 %a48, i8 %b48) + %r49 = call i8 @llvm.scmp.i8.i8(i8 %a49, i8 %b49) + %r50 = call i8 @llvm.scmp.i8.i8(i8 %a50, i8 %b50) + %r51 = call i8 @llvm.scmp.i8.i8(i8 %a51, i8 %b51) + %r52 = call i8 @llvm.scmp.i8.i8(i8 %a52, i8 %b52) + %r53 = call i8 @llvm.scmp.i8.i8(i8 %a53, i8 %b53) + %r54 = call i8 @llvm.scmp.i8.i8(i8 %a54, i8 %b54) + %r55 = call i8 @llvm.scmp.i8.i8(i8 %a55, i8 %b55) + %r56 = call i8 @llvm.scmp.i8.i8(i8 %a56, i8 %b56) + %r57 = call i8 @llvm.scmp.i8.i8(i8 %a57, i8 %b57) + %r58 = call i8 @llvm.scmp.i8.i8(i8 %a58, i8 %b58) + %r59 = call i8 @llvm.scmp.i8.i8(i8 %a59, i8 %b59) + %r60 = call i8 @llvm.scmp.i8.i8(i8 %a60, i8 %b60) + %r61 = call i8 @llvm.scmp.i8.i8(i8 %a61, i8 %b61) + %r62 = call i8 @llvm.scmp.i8.i8(i8 %a62, i8 %b62) + %r63 = call i8 @llvm.scmp.i8.i8(i8 %a63, i8 %b63) + store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1 + store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1 + store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1 + store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1 + store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1 + store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1 + store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1 + store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1 + store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1 + store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1 + store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1 + store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1 + store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1 + store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1 + store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1 + store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1 + store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 + store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1 + store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1 + store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1 + store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1 + store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1 + store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1 + store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1 + store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1 + store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1 + store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1 + store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1 + store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1 + store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1 + store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1 + store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1 + store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 + store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1 + store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1 + store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1 + store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1 + store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1 + store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1 + store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1 + store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1 + store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1 + store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1 + store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1 + store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1 + store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1 + store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1 + store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1 + store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 + store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1 + store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1 + store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1 + store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1 + store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1 + store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1 + store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1 + store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1 + store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1 + store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1 + store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1 + store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1 + store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1 + store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1 + store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll new file mode 100644 index 00000000000000..435059c6420d86 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-ucmp.ll @@ -0,0 +1,681 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX + +@a64 = common global [8 x i64] zeroinitializer, align 64 +@b64 = common global [8 x i64] zeroinitializer, align 64 +@c64 = common global [8 x i64] zeroinitializer, align 64 +@a32 = common global [16 x i32] zeroinitializer, align 64 +@b32 = common global [16 x i32] zeroinitializer, align 64 +@c32 = common global [16 x i32] zeroinitializer, align 64 +@a16 = common global [32 x i16] zeroinitializer, align 64 +@b16 = common global [32 x i16] zeroinitializer, align 64 +@c16 = common global [32 x i16] zeroinitializer, align 64 +@a8 = common global [64 x i8] zeroinitializer, align 64 +@b8 = common global [64 x i8] zeroinitializer, align 64 +@c8 = common global [64 x i8] zeroinitializer, align 64 + +define void @ucmp_v8i64() { +; SSE-LABEL: @ucmp_v8i64( +; SSE-NEXT: [[A0:%.*]] = load i64, ptr @a64, align 8 +; SSE-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 +; SSE-NEXT: [[A2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 +; SSE-NEXT: [[A3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 +; SSE-NEXT: [[A4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; SSE-NEXT: [[A5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 +; SSE-NEXT: [[A6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 +; SSE-NEXT: [[A7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 +; SSE-NEXT: [[B0:%.*]] = load i64, ptr @b64, align 8 +; SSE-NEXT: [[B1:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 +; SSE-NEXT: [[B2:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 +; SSE-NEXT: [[B3:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 +; SSE-NEXT: [[B4:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; SSE-NEXT: [[B5:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 +; SSE-NEXT: [[B6:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 +; SSE-NEXT: [[B7:%.*]] = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 +; SSE-NEXT: [[R0:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A0]], i64 [[B0]]) +; SSE-NEXT: [[R1:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A1]], i64 [[B1]]) +; SSE-NEXT: [[R2:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A2]], i64 [[B2]]) +; SSE-NEXT: [[R3:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A3]], i64 [[B3]]) +; SSE-NEXT: [[R4:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A4]], i64 [[B4]]) +; SSE-NEXT: [[R5:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A5]], i64 [[B5]]) +; SSE-NEXT: [[R6:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A6]], i64 [[B6]]) +; SSE-NEXT: [[R7:%.*]] = call i64 @llvm.ucmp.i64.i64(i64 [[A7]], i64 [[B7]]) +; SSE-NEXT: store i64 [[R0]], ptr @c64, align 8 +; SSE-NEXT: store i64 [[R1]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 +; SSE-NEXT: store i64 [[R2]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 +; SSE-NEXT: store i64 [[R3]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 +; SSE-NEXT: store i64 [[R4]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; SSE-NEXT: store i64 [[R5]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 +; SSE-NEXT: store i64 [[R6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 +; SSE-NEXT: store i64 [[R7]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 +; SSE-NEXT: ret void +; +; AVX-LABEL: @ucmp_v8i64( +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @a64, align 8 +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr @b64, align 8 +; AVX-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP2]]) +; AVX-NEXT: store <4 x i64> [[TMP3]], ptr @c64, align 8 +; AVX-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 +; AVX-NEXT: [[TMP6:%.*]] = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[TMP5]]) +; AVX-NEXT: store <4 x i64> [[TMP6]], ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @ucmp_v8i64( +; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @a64, align 8 +; AVX512-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @b64, align 8 +; AVX512-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]]) +; AVX512-NEXT: store <8 x i64> [[TMP3]], ptr @c64, align 8 +; AVX512-NEXT: ret void +; + %a0 = load i64, ptr @a64, align 8 + %a1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 1), align 8 + %a2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 2), align 8 + %a3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 3), align 8 + %a4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 4), align 8 + %a5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 5), align 8 + %a6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 6), align 8 + %a7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @a64, i32 0, i64 7), align 8 + %b0 = load i64, ptr @b64, align 8 + %b1 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 1), align 8 + %b2 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 2), align 8 + %b3 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 3), align 8 + %b4 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 4), align 8 + %b5 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 5), align 8 + %b6 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 6), align 8 + %b7 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @b64, i32 0, i64 7), align 8 + %r0 = call i64 @llvm.ucmp.i64.i64(i64 %a0, i64 %b0) + %r1 = call i64 @llvm.ucmp.i64.i64(i64 %a1, i64 %b1) + %r2 = call i64 @llvm.ucmp.i64.i64(i64 %a2, i64 %b2) + %r3 = call i64 @llvm.ucmp.i64.i64(i64 %a3, i64 %b3) + %r4 = call i64 @llvm.ucmp.i64.i64(i64 %a4, i64 %b4) + %r5 = call i64 @llvm.ucmp.i64.i64(i64 %a5, i64 %b5) + %r6 = call i64 @llvm.ucmp.i64.i64(i64 %a6, i64 %b6) + %r7 = call i64 @llvm.ucmp.i64.i64(i64 %a7, i64 %b7) + store i64 %r0, ptr @c64, align 8 + store i64 %r1, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 1), align 8 + store i64 %r2, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 2), align 8 + store i64 %r3, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 3), align 8 + store i64 %r4, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 4), align 8 + store i64 %r5, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 5), align 8 + store i64 %r6, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 6), align 8 + store i64 %r7, ptr getelementptr inbounds ([8 x i64], ptr @c64, i32 0, i64 7), align 8 + ret void +} + +define void @ucmp_v16i32() { +; SSE-LABEL: @ucmp_v16i32( +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @b32, align 4 +; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; SSE-NEXT: store <4 x i32> [[TMP3]], ptr @c32, align 4 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]]) +; SSE-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4 +; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) +; SSE-NEXT: store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 +; SSE-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 +; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) +; SSE-NEXT: store <4 x i32> [[TMP12]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 +; SSE-NEXT: ret void +; +; AVX-LABEL: @ucmp_v16i32( +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4 +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr @b32, align 4 +; AVX-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; AVX-NEXT: store <8 x i32> [[TMP3]], ptr @c32, align 4 +; AVX-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4 +; AVX-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8), align 4 +; AVX-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> [[TMP4]], <8 x i32> [[TMP5]]) +; AVX-NEXT: store <8 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @ucmp_v16i32( +; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4 +; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @b32, align 4 +; AVX512-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) +; AVX512-NEXT: store <16 x i32> [[TMP3]], ptr @c32, align 4 +; AVX512-NEXT: ret void +; + %a0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 0 ), align 4 + %a1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 1 ), align 4 + %a2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 2 ), align 4 + %a3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 3 ), align 4 + %a4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4 ), align 4 + %a5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 5 ), align 4 + %a6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 6 ), align 4 + %a7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 7 ), align 4 + %a8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8 ), align 4 + %a9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 9 ), align 4 + %a10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 10), align 4 + %a11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 11), align 4 + %a12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4 + %a13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 13), align 4 + %a14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 14), align 4 + %a15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 15), align 4 + %b0 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 0 ), align 4 + %b1 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 1 ), align 4 + %b2 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 2 ), align 4 + %b3 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 3 ), align 4 + %b4 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 4 ), align 4 + %b5 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 5 ), align 4 + %b6 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 6 ), align 4 + %b7 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 7 ), align 4 + %b8 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 8 ), align 4 + %b9 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 9 ), align 4 + %b10 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 10), align 4 + %b11 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 11), align 4 + %b12 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 12), align 4 + %b13 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 13), align 4 + %b14 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 14), align 4 + %b15 = load i32, ptr getelementptr inbounds ([16 x i32], ptr @b32, i32 0, i64 15), align 4 + %r0 = call i32 @llvm.ucmp.i32.i32(i32 %a0 , i32 %b0 ) + %r1 = call i32 @llvm.ucmp.i32.i32(i32 %a1 , i32 %b1 ) + %r2 = call i32 @llvm.ucmp.i32.i32(i32 %a2 , i32 %b2 ) + %r3 = call i32 @llvm.ucmp.i32.i32(i32 %a3 , i32 %b3 ) + %r4 = call i32 @llvm.ucmp.i32.i32(i32 %a4 , i32 %b4 ) + %r5 = call i32 @llvm.ucmp.i32.i32(i32 %a5 , i32 %b5 ) + %r6 = call i32 @llvm.ucmp.i32.i32(i32 %a6 , i32 %b6 ) + %r7 = call i32 @llvm.ucmp.i32.i32(i32 %a7 , i32 %b7 ) + %r8 = call i32 @llvm.ucmp.i32.i32(i32 %a8 , i32 %b8 ) + %r9 = call i32 @llvm.ucmp.i32.i32(i32 %a9 , i32 %b9 ) + %r10 = call i32 @llvm.ucmp.i32.i32(i32 %a10, i32 %b10) + %r11 = call i32 @llvm.ucmp.i32.i32(i32 %a11, i32 %b11) + %r12 = call i32 @llvm.ucmp.i32.i32(i32 %a12, i32 %b12) + %r13 = call i32 @llvm.ucmp.i32.i32(i32 %a13, i32 %b13) + %r14 = call i32 @llvm.ucmp.i32.i32(i32 %a14, i32 %b14) + %r15 = call i32 @llvm.ucmp.i32.i32(i32 %a15, i32 %b15) + store i32 %r0 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 0 ), align 4 + store i32 %r1 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 1 ), align 4 + store i32 %r2 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 2 ), align 4 + store i32 %r3 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 3 ), align 4 + store i32 %r4 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4 ), align 4 + store i32 %r5 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 5 ), align 4 + store i32 %r6 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 6 ), align 4 + store i32 %r7 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 7 ), align 4 + store i32 %r8 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8 ), align 4 + store i32 %r9 , ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 9 ), align 4 + store i32 %r10, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 10), align 4 + store i32 %r11, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 11), align 4 + store i32 %r12, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4 + store i32 %r13, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 13), align 4 + store i32 %r14, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 14), align 4 + store i32 %r15, ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 15), align 4 + ret void +} + +define void @ucmp_v32i16() { +; SSE-LABEL: @ucmp_v32i16( +; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @a16, align 2 +; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @b16, align 2 +; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; SSE-NEXT: store <8 x i16> [[TMP3]], ptr @c16, align 2 +; SSE-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]]) +; SSE-NEXT: store <8 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8), align 2 +; SSE-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) +; SSE-NEXT: store <8 x i16> [[TMP9]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 +; SSE-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 +; SSE-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 +; SSE-NEXT: [[TMP12:%.*]] = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) +; SSE-NEXT: store <8 x i16> [[TMP12]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2 +; SSE-NEXT: ret void +; +; AVX-LABEL: @ucmp_v32i16( +; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @a16, align 2 +; AVX-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @b16, align 2 +; AVX-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; AVX-NEXT: store <16 x i16> [[TMP3]], ptr @c16, align 2 +; AVX-NEXT: [[TMP4:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 +; AVX-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 +; AVX-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> [[TMP4]], <16 x i16> [[TMP5]]) +; AVX-NEXT: store <16 x i16> [[TMP6]], ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @ucmp_v32i16( +; AVX512-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @a16, align 2 +; AVX512-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr @b16, align 2 +; AVX512-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) +; AVX512-NEXT: store <32 x i16> [[TMP3]], ptr @c16, align 2 +; AVX512-NEXT: ret void +; + %a0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 0 ), align 2 + %a1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 1 ), align 2 + %a2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 2 ), align 2 + %a3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 3 ), align 2 + %a4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 4 ), align 2 + %a5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 5 ), align 2 + %a6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 6 ), align 2 + %a7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 7 ), align 2 + %a8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 8 ), align 2 + %a9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 9 ), align 2 + %a10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 10), align 2 + %a11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 11), align 2 + %a12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 12), align 2 + %a13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 13), align 2 + %a14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 14), align 2 + %a15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 15), align 2 + %a16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 16), align 2 + %a17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 17), align 2 + %a18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 18), align 2 + %a19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 19), align 2 + %a20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 20), align 2 + %a21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 21), align 2 + %a22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 22), align 2 + %a23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 23), align 2 + %a24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 24), align 2 + %a25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 25), align 2 + %a26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 26), align 2 + %a27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 27), align 2 + %a28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 28), align 2 + %a29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 29), align 2 + %a30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 30), align 2 + %a31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @a16, i32 0, i64 31), align 2 + %b0 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 0 ), align 2 + %b1 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 1 ), align 2 + %b2 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 2 ), align 2 + %b3 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 3 ), align 2 + %b4 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 4 ), align 2 + %b5 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 5 ), align 2 + %b6 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 6 ), align 2 + %b7 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 7 ), align 2 + %b8 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 8 ), align 2 + %b9 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 9 ), align 2 + %b10 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 10), align 2 + %b11 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 11), align 2 + %b12 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 12), align 2 + %b13 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 13), align 2 + %b14 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 14), align 2 + %b15 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 15), align 2 + %b16 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 16), align 2 + %b17 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 17), align 2 + %b18 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 18), align 2 + %b19 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 19), align 2 + %b20 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 20), align 2 + %b21 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 21), align 2 + %b22 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 22), align 2 + %b23 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 23), align 2 + %b24 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 24), align 2 + %b25 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 25), align 2 + %b26 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 26), align 2 + %b27 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 27), align 2 + %b28 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 28), align 2 + %b29 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 29), align 2 + %b30 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 30), align 2 + %b31 = load i16, ptr getelementptr inbounds ([32 x i16], ptr @b16, i32 0, i64 31), align 2 + %r0 = call i16 @llvm.ucmp.i16.i16(i16 %a0 , i16 %b0 ) + %r1 = call i16 @llvm.ucmp.i16.i16(i16 %a1 , i16 %b1 ) + %r2 = call i16 @llvm.ucmp.i16.i16(i16 %a2 , i16 %b2 ) + %r3 = call i16 @llvm.ucmp.i16.i16(i16 %a3 , i16 %b3 ) + %r4 = call i16 @llvm.ucmp.i16.i16(i16 %a4 , i16 %b4 ) + %r5 = call i16 @llvm.ucmp.i16.i16(i16 %a5 , i16 %b5 ) + %r6 = call i16 @llvm.ucmp.i16.i16(i16 %a6 , i16 %b6 ) + %r7 = call i16 @llvm.ucmp.i16.i16(i16 %a7 , i16 %b7 ) + %r8 = call i16 @llvm.ucmp.i16.i16(i16 %a8 , i16 %b8 ) + %r9 = call i16 @llvm.ucmp.i16.i16(i16 %a9 , i16 %b9 ) + %r10 = call i16 @llvm.ucmp.i16.i16(i16 %a10, i16 %b10) + %r11 = call i16 @llvm.ucmp.i16.i16(i16 %a11, i16 %b11) + %r12 = call i16 @llvm.ucmp.i16.i16(i16 %a12, i16 %b12) + %r13 = call i16 @llvm.ucmp.i16.i16(i16 %a13, i16 %b13) + %r14 = call i16 @llvm.ucmp.i16.i16(i16 %a14, i16 %b14) + %r15 = call i16 @llvm.ucmp.i16.i16(i16 %a15, i16 %b15) + %r16 = call i16 @llvm.ucmp.i16.i16(i16 %a16, i16 %b16) + %r17 = call i16 @llvm.ucmp.i16.i16(i16 %a17, i16 %b17) + %r18 = call i16 @llvm.ucmp.i16.i16(i16 %a18, i16 %b18) + %r19 = call i16 @llvm.ucmp.i16.i16(i16 %a19, i16 %b19) + %r20 = call i16 @llvm.ucmp.i16.i16(i16 %a20, i16 %b20) + %r21 = call i16 @llvm.ucmp.i16.i16(i16 %a21, i16 %b21) + %r22 = call i16 @llvm.ucmp.i16.i16(i16 %a22, i16 %b22) + %r23 = call i16 @llvm.ucmp.i16.i16(i16 %a23, i16 %b23) + %r24 = call i16 @llvm.ucmp.i16.i16(i16 %a24, i16 %b24) + %r25 = call i16 @llvm.ucmp.i16.i16(i16 %a25, i16 %b25) + %r26 = call i16 @llvm.ucmp.i16.i16(i16 %a26, i16 %b26) + %r27 = call i16 @llvm.ucmp.i16.i16(i16 %a27, i16 %b27) + %r28 = call i16 @llvm.ucmp.i16.i16(i16 %a28, i16 %b28) + %r29 = call i16 @llvm.ucmp.i16.i16(i16 %a29, i16 %b29) + %r30 = call i16 @llvm.ucmp.i16.i16(i16 %a30, i16 %b30) + %r31 = call i16 @llvm.ucmp.i16.i16(i16 %a31, i16 %b31) + store i16 %r0 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 0 ), align 2 + store i16 %r1 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 1 ), align 2 + store i16 %r2 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 2 ), align 2 + store i16 %r3 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 3 ), align 2 + store i16 %r4 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 4 ), align 2 + store i16 %r5 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 5 ), align 2 + store i16 %r6 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 6 ), align 2 + store i16 %r7 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 7 ), align 2 + store i16 %r8 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 8 ), align 2 + store i16 %r9 , ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 9 ), align 2 + store i16 %r10, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 10), align 2 + store i16 %r11, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 11), align 2 + store i16 %r12, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 12), align 2 + store i16 %r13, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 13), align 2 + store i16 %r14, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 14), align 2 + store i16 %r15, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 15), align 2 + store i16 %r16, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 16), align 2 + store i16 %r17, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 17), align 2 + store i16 %r18, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 18), align 2 + store i16 %r19, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 19), align 2 + store i16 %r20, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 20), align 2 + store i16 %r21, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 21), align 2 + store i16 %r22, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 22), align 2 + store i16 %r23, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 23), align 2 + store i16 %r24, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 24), align 2 + store i16 %r25, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 25), align 2 + store i16 %r26, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 26), align 2 + store i16 %r27, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 27), align 2 + store i16 %r28, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 28), align 2 + store i16 %r29, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 29), align 2 + store i16 %r30, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 30), align 2 + store i16 %r31, ptr getelementptr inbounds ([32 x i16], ptr @c16, i32 0, i64 31), align 2 + ret void +} + +define void @ucmp_v64i8() { +; SSE-LABEL: @ucmp_v64i8( +; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @a8, align 1 +; SSE-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @b8, align 1 +; SSE-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; SSE-NEXT: store <16 x i8> [[TMP3]], ptr @c8, align 1 +; SSE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) +; SSE-NEXT: store <16 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 +; SSE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 +; SSE-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +; SSE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 +; SSE-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]]) +; SSE-NEXT: store <16 x i8> [[TMP9]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; SSE-NEXT: store <16 x i8> [[TMP12]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 +; SSE-NEXT: ret void +; +; AVX-LABEL: @ucmp_v64i8( +; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @a8, align 1 +; AVX-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr @b8, align 1 +; AVX-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> [[TMP1]], <32 x i8> [[TMP2]]) +; AVX-NEXT: store <32 x i8> [[TMP3]], ptr @c8, align 1 +; AVX-NEXT: [[TMP4:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 +; AVX-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 +; AVX-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> [[TMP4]], <32 x i8> [[TMP5]]) +; AVX-NEXT: store <32 x i8> [[TMP6]], ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 +; AVX-NEXT: ret void +; +; AVX512-LABEL: @ucmp_v64i8( +; AVX512-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @a8, align 1 +; AVX512-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr @b8, align 1 +; AVX512-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> [[TMP1]], <64 x i8> [[TMP2]]) +; AVX512-NEXT: store <64 x i8> [[TMP3]], ptr @c8, align 1 +; AVX512-NEXT: ret void +; + %a0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 0 ), align 1 + %a1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 1 ), align 1 + %a2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 2 ), align 1 + %a3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 3 ), align 1 + %a4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 4 ), align 1 + %a5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 5 ), align 1 + %a6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 6 ), align 1 + %a7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 7 ), align 1 + %a8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 8 ), align 1 + %a9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 9 ), align 1 + %a10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 10), align 1 + %a11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 11), align 1 + %a12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 12), align 1 + %a13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 13), align 1 + %a14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 14), align 1 + %a15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 15), align 1 + %a16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 16), align 1 + %a17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 17), align 1 + %a18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 18), align 1 + %a19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 19), align 1 + %a20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 20), align 1 + %a21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 21), align 1 + %a22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 22), align 1 + %a23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 23), align 1 + %a24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 24), align 1 + %a25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 25), align 1 + %a26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 26), align 1 + %a27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 27), align 1 + %a28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 28), align 1 + %a29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 29), align 1 + %a30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 30), align 1 + %a31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 31), align 1 + %a32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 32), align 1 + %a33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 33), align 1 + %a34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 34), align 1 + %a35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 35), align 1 + %a36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 36), align 1 + %a37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 37), align 1 + %a38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 38), align 1 + %a39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 39), align 1 + %a40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 40), align 1 + %a41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 41), align 1 + %a42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 42), align 1 + %a43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 43), align 1 + %a44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 44), align 1 + %a45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 45), align 1 + %a46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 46), align 1 + %a47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 47), align 1 + %a48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 48), align 1 + %a49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 49), align 1 + %a50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 50), align 1 + %a51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 51), align 1 + %a52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 52), align 1 + %a53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 53), align 1 + %a54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 54), align 1 + %a55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 55), align 1 + %a56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 56), align 1 + %a57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 57), align 1 + %a58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 58), align 1 + %a59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 59), align 1 + %a60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 60), align 1 + %a61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 61), align 1 + %a62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 62), align 1 + %a63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @a8, i32 0, i64 63), align 1 + %b0 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 0 ), align 1 + %b1 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 1 ), align 1 + %b2 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 2 ), align 1 + %b3 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 3 ), align 1 + %b4 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 4 ), align 1 + %b5 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 5 ), align 1 + %b6 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 6 ), align 1 + %b7 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 7 ), align 1 + %b8 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 8 ), align 1 + %b9 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 9 ), align 1 + %b10 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 10), align 1 + %b11 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 11), align 1 + %b12 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 12), align 1 + %b13 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 13), align 1 + %b14 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 14), align 1 + %b15 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 15), align 1 + %b16 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 16), align 1 + %b17 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 17), align 1 + %b18 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 18), align 1 + %b19 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 19), align 1 + %b20 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 20), align 1 + %b21 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 21), align 1 + %b22 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 22), align 1 + %b23 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 23), align 1 + %b24 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 24), align 1 + %b25 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 25), align 1 + %b26 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 26), align 1 + %b27 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 27), align 1 + %b28 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 28), align 1 + %b29 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 29), align 1 + %b30 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 30), align 1 + %b31 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 31), align 1 + %b32 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 32), align 1 + %b33 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 33), align 1 + %b34 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 34), align 1 + %b35 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 35), align 1 + %b36 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 36), align 1 + %b37 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 37), align 1 + %b38 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 38), align 1 + %b39 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 39), align 1 + %b40 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 40), align 1 + %b41 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 41), align 1 + %b42 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 42), align 1 + %b43 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 43), align 1 + %b44 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 44), align 1 + %b45 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 45), align 1 + %b46 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 46), align 1 + %b47 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 47), align 1 + %b48 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 48), align 1 + %b49 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 49), align 1 + %b50 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 50), align 1 + %b51 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 51), align 1 + %b52 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 52), align 1 + %b53 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 53), align 1 + %b54 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 54), align 1 + %b55 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 55), align 1 + %b56 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 56), align 1 + %b57 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 57), align 1 + %b58 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 58), align 1 + %b59 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 59), align 1 + %b60 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 60), align 1 + %b61 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 61), align 1 + %b62 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 62), align 1 + %b63 = load i8, ptr getelementptr inbounds ([64 x i8], ptr @b8, i32 0, i64 63), align 1 + %r0 = call i8 @llvm.ucmp.i8.i8(i8 %a0 , i8 %b0 ) + %r1 = call i8 @llvm.ucmp.i8.i8(i8 %a1 , i8 %b1 ) + %r2 = call i8 @llvm.ucmp.i8.i8(i8 %a2 , i8 %b2 ) + %r3 = call i8 @llvm.ucmp.i8.i8(i8 %a3 , i8 %b3 ) + %r4 = call i8 @llvm.ucmp.i8.i8(i8 %a4 , i8 %b4 ) + %r5 = call i8 @llvm.ucmp.i8.i8(i8 %a5 , i8 %b5 ) + %r6 = call i8 @llvm.ucmp.i8.i8(i8 %a6 , i8 %b6 ) + %r7 = call i8 @llvm.ucmp.i8.i8(i8 %a7 , i8 %b7 ) + %r8 = call i8 @llvm.ucmp.i8.i8(i8 %a8 , i8 %b8 ) + %r9 = call i8 @llvm.ucmp.i8.i8(i8 %a9 , i8 %b9 ) + %r10 = call i8 @llvm.ucmp.i8.i8(i8 %a10, i8 %b10) + %r11 = call i8 @llvm.ucmp.i8.i8(i8 %a11, i8 %b11) + %r12 = call i8 @llvm.ucmp.i8.i8(i8 %a12, i8 %b12) + %r13 = call i8 @llvm.ucmp.i8.i8(i8 %a13, i8 %b13) + %r14 = call i8 @llvm.ucmp.i8.i8(i8 %a14, i8 %b14) + %r15 = call i8 @llvm.ucmp.i8.i8(i8 %a15, i8 %b15) + %r16 = call i8 @llvm.ucmp.i8.i8(i8 %a16, i8 %b16) + %r17 = call i8 @llvm.ucmp.i8.i8(i8 %a17, i8 %b17) + %r18 = call i8 @llvm.ucmp.i8.i8(i8 %a18, i8 %b18) + %r19 = call i8 @llvm.ucmp.i8.i8(i8 %a19, i8 %b19) + %r20 = call i8 @llvm.ucmp.i8.i8(i8 %a20, i8 %b20) + %r21 = call i8 @llvm.ucmp.i8.i8(i8 %a21, i8 %b21) + %r22 = call i8 @llvm.ucmp.i8.i8(i8 %a22, i8 %b22) + %r23 = call i8 @llvm.ucmp.i8.i8(i8 %a23, i8 %b23) + %r24 = call i8 @llvm.ucmp.i8.i8(i8 %a24, i8 %b24) + %r25 = call i8 @llvm.ucmp.i8.i8(i8 %a25, i8 %b25) + %r26 = call i8 @llvm.ucmp.i8.i8(i8 %a26, i8 %b26) + %r27 = call i8 @llvm.ucmp.i8.i8(i8 %a27, i8 %b27) + %r28 = call i8 @llvm.ucmp.i8.i8(i8 %a28, i8 %b28) + %r29 = call i8 @llvm.ucmp.i8.i8(i8 %a29, i8 %b29) + %r30 = call i8 @llvm.ucmp.i8.i8(i8 %a30, i8 %b30) + %r31 = call i8 @llvm.ucmp.i8.i8(i8 %a31, i8 %b31) + %r32 = call i8 @llvm.ucmp.i8.i8(i8 %a32, i8 %b32) + %r33 = call i8 @llvm.ucmp.i8.i8(i8 %a33, i8 %b33) + %r34 = call i8 @llvm.ucmp.i8.i8(i8 %a34, i8 %b34) + %r35 = call i8 @llvm.ucmp.i8.i8(i8 %a35, i8 %b35) + %r36 = call i8 @llvm.ucmp.i8.i8(i8 %a36, i8 %b36) + %r37 = call i8 @llvm.ucmp.i8.i8(i8 %a37, i8 %b37) + %r38 = call i8 @llvm.ucmp.i8.i8(i8 %a38, i8 %b38) + %r39 = call i8 @llvm.ucmp.i8.i8(i8 %a39, i8 %b39) + %r40 = call i8 @llvm.ucmp.i8.i8(i8 %a40, i8 %b40) + %r41 = call i8 @llvm.ucmp.i8.i8(i8 %a41, i8 %b41) + %r42 = call i8 @llvm.ucmp.i8.i8(i8 %a42, i8 %b42) + %r43 = call i8 @llvm.ucmp.i8.i8(i8 %a43, i8 %b43) + %r44 = call i8 @llvm.ucmp.i8.i8(i8 %a44, i8 %b44) + %r45 = call i8 @llvm.ucmp.i8.i8(i8 %a45, i8 %b45) + %r46 = call i8 @llvm.ucmp.i8.i8(i8 %a46, i8 %b46) + %r47 = call i8 @llvm.ucmp.i8.i8(i8 %a47, i8 %b47) + %r48 = call i8 @llvm.ucmp.i8.i8(i8 %a48, i8 %b48) + %r49 = call i8 @llvm.ucmp.i8.i8(i8 %a49, i8 %b49) + %r50 = call i8 @llvm.ucmp.i8.i8(i8 %a50, i8 %b50) + %r51 = call i8 @llvm.ucmp.i8.i8(i8 %a51, i8 %b51) + %r52 = call i8 @llvm.ucmp.i8.i8(i8 %a52, i8 %b52) + %r53 = call i8 @llvm.ucmp.i8.i8(i8 %a53, i8 %b53) + %r54 = call i8 @llvm.ucmp.i8.i8(i8 %a54, i8 %b54) + %r55 = call i8 @llvm.ucmp.i8.i8(i8 %a55, i8 %b55) + %r56 = call i8 @llvm.ucmp.i8.i8(i8 %a56, i8 %b56) + %r57 = call i8 @llvm.ucmp.i8.i8(i8 %a57, i8 %b57) + %r58 = call i8 @llvm.ucmp.i8.i8(i8 %a58, i8 %b58) + %r59 = call i8 @llvm.ucmp.i8.i8(i8 %a59, i8 %b59) + %r60 = call i8 @llvm.ucmp.i8.i8(i8 %a60, i8 %b60) + %r61 = call i8 @llvm.ucmp.i8.i8(i8 %a61, i8 %b61) + %r62 = call i8 @llvm.ucmp.i8.i8(i8 %a62, i8 %b62) + %r63 = call i8 @llvm.ucmp.i8.i8(i8 %a63, i8 %b63) + store i8 %r0 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 0 ), align 1 + store i8 %r1 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 1 ), align 1 + store i8 %r2 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 2 ), align 1 + store i8 %r3 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 3 ), align 1 + store i8 %r4 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 4 ), align 1 + store i8 %r5 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 5 ), align 1 + store i8 %r6 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 6 ), align 1 + store i8 %r7 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 7 ), align 1 + store i8 %r8 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 8 ), align 1 + store i8 %r9 , ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 9 ), align 1 + store i8 %r10, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 10), align 1 + store i8 %r11, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 11), align 1 + store i8 %r12, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 12), align 1 + store i8 %r13, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 13), align 1 + store i8 %r14, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 14), align 1 + store i8 %r15, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 15), align 1 + store i8 %r16, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 16), align 1 + store i8 %r17, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 17), align 1 + store i8 %r18, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 18), align 1 + store i8 %r19, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 19), align 1 + store i8 %r20, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 20), align 1 + store i8 %r21, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 21), align 1 + store i8 %r22, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 22), align 1 + store i8 %r23, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 23), align 1 + store i8 %r24, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 24), align 1 + store i8 %r25, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 25), align 1 + store i8 %r26, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 26), align 1 + store i8 %r27, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 27), align 1 + store i8 %r28, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 28), align 1 + store i8 %r29, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 29), align 1 + store i8 %r30, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 30), align 1 + store i8 %r31, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 31), align 1 + store i8 %r32, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 32), align 1 + store i8 %r33, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 33), align 1 + store i8 %r34, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 34), align 1 + store i8 %r35, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 35), align 1 + store i8 %r36, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 36), align 1 + store i8 %r37, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 37), align 1 + store i8 %r38, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 38), align 1 + store i8 %r39, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 39), align 1 + store i8 %r40, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 40), align 1 + store i8 %r41, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 41), align 1 + store i8 %r42, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 42), align 1 + store i8 %r43, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 43), align 1 + store i8 %r44, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 44), align 1 + store i8 %r45, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 45), align 1 + store i8 %r46, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 46), align 1 + store i8 %r47, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 47), align 1 + store i8 %r48, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 48), align 1 + store i8 %r49, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 49), align 1 + store i8 %r50, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 50), align 1 + store i8 %r51, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 51), align 1 + store i8 %r52, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 52), align 1 + store i8 %r53, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 53), align 1 + store i8 %r54, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 54), align 1 + store i8 %r55, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 55), align 1 + store i8 %r56, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 56), align 1 + store i8 %r57, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 57), align 1 + store i8 %r58, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 58), align 1 + store i8 %r59, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 59), align 1 + store i8 %r60, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 60), align 1 + store i8 %r61, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 61), align 1 + store i8 %r62, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 62), align 1 + store i8 %r63, ptr getelementptr inbounds ([64 x i8], ptr @c8, i32 0, i64 63), align 1 + ret void +} diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index eb4b847185f53b..fff29d24a05299 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -220,11 +220,12 @@ TEST(APIntTest, i256) { } TEST(APIntTest, i1) { - const APInt neg_two(1, static_cast(-2), true); + const APInt neg_two(1, static_cast(-2), true, + /*implicitTrunc=*/true); const APInt neg_one(1, static_cast(-1), true); const APInt zero(1, 0); const APInt one(1, 1); - const APInt two(1, 2); + const APInt two(1, 2, false, /*implicitTrunc=*/true); EXPECT_EQ(0, neg_two.getSExtValue()); EXPECT_EQ(-1, neg_one.getSExtValue()); diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index 5eb96a86e472d6..d6062bed5c0c0f 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -234,10 +234,14 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIBasicTypeAttrGet( MlirContext ctx, unsigned int tag, MlirAttribute name, uint64_t sizeInBits, MlirLLVMTypeEncoding encoding); +/// Creates a self-referencing LLVM DICompositeType attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDICompositeTypeAttrGetRecSelf(MlirAttribute recId); + /// Creates a LLVM DICompositeType attribute. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompositeTypeAttrGet( - MlirContext ctx, unsigned int tag, MlirAttribute recId, MlirAttribute name, - MlirAttribute file, uint32_t line, MlirAttribute scope, + MlirContext ctx, MlirAttribute recId, bool isRecSelf, unsigned int tag, + MlirAttribute name, MlirAttribute file, uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, MlirAttribute const *elements, MlirAttribute dataLocation, MlirAttribute rank, MlirAttribute allocated, @@ -311,13 +315,17 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDILocalVariableAttrGet( MlirAttribute diFile, unsigned int line, unsigned int arg, unsigned int alignInBits, MlirAttribute diType, int64_t flags); +/// Creates a self-referencing LLVM DISubprogramAttr attribute. +MLIR_CAPI_EXPORTED MlirAttribute +mlirLLVMDISubprogramAttrGetRecSelf(MlirAttribute recId); + /// Creates a LLVM DISubprogramAttr attribute. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDISubprogramAttrGet( - MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, - MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, - MlirAttribute file, unsigned int line, unsigned int scopeLine, - uint64_t subprogramFlags, MlirAttribute type, intptr_t nRetainedNodes, - MlirAttribute const *retainedNodes); + MlirContext ctx, MlirAttribute recId, bool isRecSelf, MlirAttribute id, + MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, + MlirAttribute linkageName, MlirAttribute file, unsigned int line, + unsigned int scopeLine, uint64_t subprogramFlags, MlirAttribute type, + intptr_t nRetainedNodes, MlirAttribute const *retainedNodes); /// Gets the scope from this DISubprogramAttr. MLIR_CAPI_EXPORTED MlirAttribute @@ -356,9 +364,9 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIModuleAttrGet( /// Creates a LLVM DIImportedEntityAttr attribute. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIImportedEntityAttrGet( - MlirContext ctx, unsigned int tag, MlirAttribute entity, MlirAttribute file, - unsigned int line, MlirAttribute name, intptr_t nElements, - MlirAttribute const *elements); + MlirContext ctx, unsigned int tag, MlirAttribute scope, + MlirAttribute entity, MlirAttribute file, unsigned int line, + MlirAttribute name, intptr_t nElements, MlirAttribute const *elements); /// Gets the scope of this DIModuleAttr. MLIR_CAPI_EXPORTED MlirAttribute diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td index 3f1776f57e4c71..d847dda5ae9f9b 100644 --- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td +++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEOps.td @@ -592,7 +592,7 @@ def StoreTileSliceOp : ArmSME_Op<"store_tile_slice", [ }]; } -def MoveVectorToTileSliceOp : ArmSME_Op<"move_vector_to_tile_slice", [ +def InsertTileSliceOp : ArmSME_Op<"insert_tile_slice", [ ArmSMETileOpInterface, Pure, AllTypesMatch<["tile", "result"]>, TypesMatchWith< @@ -603,25 +603,25 @@ def MoveVectorToTileSliceOp : ArmSME_Op<"move_vector_to_tile_slice", [ "::llvm::cast($_self).getElementType()," "/*scalableDims=*/{true})">, ]> { - let summary = "Move 1-D scalable vector to slice of 2-D tile"; + let summary = "Insert 1-D scalable vector into slice of 2-D tile"; let description = [{ - The vector to tile slice operation moves a 1-D scalable vector to a slice - of a 2-D scalable vector tile at the given index. The type of the 1-D - scalable vector to be moved must match the type of the tile slice. A tile - slice is a 1-D vector of horizontally or vertically contiguous elements - within a ZA tile. The updated tile is returned as the result. + Inserts a 1-D scalable vector to a slice of a 2-D scalable vector tile at + the given index. The type of the 1-D scalable vector to be inserted must + match the type of the tile slice. A tile slice is a 1-D vector of + horizontally or vertically contiguous elements within a ZA tile. The updated + tile is returned as the result. An optional tile slice layout attribute specifies whether the tile slice is horizontal (default) or vertical. - Example 1: Move a vector<[16]xi8> into tile horizontally (default) at given index. + Example 1: Insert `vector<[16]xi8>` into tile horizontally at the given index. ```mlir - %tile_update = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[16]xi8> into vector<[16]x[16]xi8> + %tile_update = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[16]xi8> into vector<[16]x[16]xi8> ``` - Example 2: Move a vector<[2]xf64> into tile vertically at given index. + Example 2: Insert `vector<[2]xf64>` into tile vertically at the given index. ```mlir - %tile_update = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index layout : vector<[2]xf64> into vector<[2]x[2]xf64> + %tile_update = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] layout : vector<[2]xf64> into vector<[2]x[2]xf64> ``` }]; let arguments = (ins @@ -636,35 +636,35 @@ def MoveVectorToTileSliceOp : ArmSME_Op<"move_vector_to_tile_slice", [ }]; let assemblyFormat = [{ - $vector `,` $tile `,` $tile_slice_index (`layout` `` $layout^)? + $vector `,` $tile `[` $tile_slice_index `]` (`layout` `` $layout^)? attr-dict `:` type($vector) `into` type($result) }]; } -def MoveTileSliceToVectorOp : ArmSME_Op<"move_tile_slice_to_vector", [ +def ExtractTileSliceOp : ArmSME_Op<"extract_tile_slice", [ ArmSMETileOpInterface, Pure, TypesMatchWith< "type of 'result' matches type of 'tile' slice", "tile", "result", "VectorType(VectorType::Builder(::llvm::cast($_self)).dropDim(0))">, ]> { - let summary = "Move slice of a 2-D tile to a 1-D scalable vector"; + let summary = "Extract 1-D scalable vector from slice of 2-D tile"; let description = [{ - The tile slice to vector operation extracts a 1-D scalable slice from a 2-D - scalable tile at the given index. A tile slice is a 1-D vector of - horizontally or vertically contiguous elements within a ZA tile. + Extracts a 1-D scalable slice from a 2-D scalable tile at the given index. + A tile slice is a 1-D vector of horizontally or vertically contiguous + elements within a ZA tile. An optional tile slice layout attribute specifies whether the tile slice is horizontal (default) or vertical. Example 1: Extract `vector<[16]xi8>` from tile horizontally at the given index. ```mlir - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> ``` Example 2: Extract `vector<[2]xf64>` from tile vertically at the given index. ```mlir - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] layout : vector<[2]xf64> from vector<[2]x[2]xf64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] layout : vector<[2]xf64> from vector<[2]x[2]xf64> ``` }]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index e57be7f760d380..49e54df3436ff3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -271,7 +271,7 @@ def LLVM_DILanguageParameter : LLVM_DIParameter< >; def LLVM_DITagParameter : LLVM_DIParameter< - "tag", /*default=*/"", "Tag", /*errorCase=*/"llvm::dwarf::DW_TAG_invalid" + "tag", /*default=*/"0", "Tag", /*errorCase=*/"llvm::dwarf::DW_TAG_invalid" >; def LLVM_DIOperationEncodingParameter : LLVM_DIParameter< @@ -375,14 +375,17 @@ def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", [LLVM_DIRecursiveTypeAttrInterface], "DITypeAttr"> { let parameters = (ins - LLVM_DITagParameter:$tag, + // DIRecursiveTypeAttrInterface specific parameters. OptionalParameter<"DistinctAttr">:$recId, + OptionalParameter<"bool">:$isRecSelf, + // DICompositeType specific parameters. + LLVM_DITagParameter:$tag, OptionalParameter<"StringAttr">:$name, OptionalParameter<"DIFileAttr">:$file, OptionalParameter<"uint32_t">:$line, OptionalParameter<"DIScopeAttr">:$scope, OptionalParameter<"DITypeAttr">:$baseType, - OptionalParameter<"DIFlags", "DIFlags::Zero">:$flags, + OptionalParameter<"DIFlags">:$flags, OptionalParameter<"uint64_t">:$sizeInBits, OptionalParameter<"uint64_t">:$alignInBits, OptionalArrayRefParameter<"DINodeAttr">:$elements, @@ -391,14 +394,26 @@ def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", OptionalParameter<"DIExpressionAttr">:$allocated, OptionalParameter<"DIExpressionAttr">:$associated ); + let builders = [ + AttrBuilder<(ins + "unsigned":$tag, "StringAttr":$name, "DIFileAttr":$file, + "uint32_t":$line, "DIScopeAttr":$scope, "DITypeAttr":$baseType, + "DIFlags":$flags, "uint64_t":$sizeInBits, "uint64_t":$alignInBits, + "ArrayRef":$elements, "DIExpressionAttr":$dataLocation, + "DIExpressionAttr":$rank, "DIExpressionAttr":$allocated, + "DIExpressionAttr":$associated + ), [{ + return $_get($_ctxt, /*recId=*/nullptr, /*isRecSelf=*/nullptr, + tag, name, file, line, scope, baseType, flags, sizeInBits, + alignInBits, elements, dataLocation, rank, allocated, + associated); + }]> + ]; let assemblyFormat = "`<` struct(params) `>`"; let extraClassDeclaration = [{ /// Requirements of DIRecursiveTypeAttrInterface. /// @{ - /// Get whether this attr describes a recursive self reference. - bool isRecSelf() { return getTag() == 0; } - /// Get a copy of this type attr but with the recursive ID set to `recId`. DIRecursiveTypeAttrInterface withRecId(DistinctAttr recId); @@ -554,14 +569,19 @@ def LLVM_DILocalVariableAttr : LLVM_Attr<"DILocalVariable", "di_local_variable", //===----------------------------------------------------------------------===// def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", - /*traits=*/[], "DIScopeAttr"> { + [LLVM_DIRecursiveTypeAttrInterface], + "DIScopeAttr"> { let parameters = (ins + // DIRecursiveTypeAttrInterface specific parameters. + OptionalParameter<"DistinctAttr">:$recId, + OptionalParameter<"bool">:$isRecSelf, + // DISubprogramAttr specific parameters. OptionalParameter<"DistinctAttr">:$id, OptionalParameter<"DICompileUnitAttr">:$compileUnit, - "DIScopeAttr":$scope, + OptionalParameter<"DIScopeAttr">:$scope, OptionalParameter<"StringAttr">:$name, OptionalParameter<"StringAttr">:$linkageName, - "DIFileAttr":$file, + OptionalParameter<"DIFileAttr">:$file, OptionalParameter<"unsigned">:$line, OptionalParameter<"unsigned">:$scopeLine, OptionalParameter<"DISubprogramFlags">:$subprogramFlags, @@ -569,21 +589,31 @@ def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", OptionalArrayRefParameter<"DINodeAttr">:$retainedNodes ); let builders = [ - AttrBuilderWithInferredContext<(ins + AttrBuilder<(ins "DistinctAttr":$id, "DICompileUnitAttr":$compileUnit, - "DIScopeAttr":$scope, "StringRef":$name, "StringRef":$linkageName, + "DIScopeAttr":$scope, "StringAttr":$name, "StringAttr":$linkageName, "DIFileAttr":$file, "unsigned":$line, "unsigned":$scopeLine, "DISubprogramFlags":$subprogramFlags, "DISubroutineTypeAttr":$type, "ArrayRef":$retainedNodes ), [{ - MLIRContext *ctx = file.getContext(); - return $_get(ctx, id, compileUnit, scope, StringAttr::get(ctx, name), - StringAttr::get(ctx, linkageName), file, line, - scopeLine, subprogramFlags, type, retainedNodes); + return $_get($_ctxt, /*recId=*/nullptr, /*isRecSelf=*/false, id, compileUnit, + scope, name, linkageName, file, line, scopeLine, + subprogramFlags, type, retainedNodes); }]> ]; - let assemblyFormat = "`<` struct(params) `>`"; + let extraClassDeclaration = [{ + /// Requirements of DIRecursiveTypeAttrInterface. + /// @{ + + /// Get a copy of this type attr but with the recursive ID set to `recId`. + DIRecursiveTypeAttrInterface withRecId(DistinctAttr recId); + + /// Build a rec-self instance using the provided `recId`. + static DIRecursiveTypeAttrInterface getRecSelf(DistinctAttr recId); + + /// @} + }]; } //===----------------------------------------------------------------------===// @@ -627,13 +657,9 @@ def LLVM_DINamespaceAttr : LLVM_Attr<"DINamespace", "di_namespace", def LLVM_DIImportedEntityAttr : LLVM_Attr<"DIImportedEntity", "di_imported_entity", /*traits=*/[], "DINodeAttr"> { - /// TODO: DIImportedEntity has a 'scope' field which represents the scope where - /// this entity is imported. Currently, we are not adding a 'scope' field in - /// DIImportedEntityAttr to avoid cyclic dependency. As DIImportedEntityAttr - /// entries will be contained inside a scope entity (e.g. DISubprogramAttr), - /// the scope can easily be inferred. let parameters = (ins LLVM_DITagParameter:$tag, + "DIScopeAttr":$scope, "DINodeAttr":$entity, OptionalParameter<"DIFileAttr">:$file, OptionalParameter<"unsigned">:$line, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td index 7085f81e203a1e..e2180410a8f04e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMInterfaces.td @@ -406,7 +406,7 @@ def LLVM_DIRecursiveTypeAttrInterface let methods = [ InterfaceMethod<[{ Get whether this attr describes a recursive self reference. - }], "bool", "isRecSelf", (ins)>, + }], "bool", "getIsRecSelf", (ins)>, InterfaceMethod<[{ Get the recursive ID used for matching "rec-decl" with "rec-self". If this attr instance is not recursive, return a null attribute. diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 13341f0c4de881..03b536d7aad98f 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -159,9 +159,14 @@ MlirAttribute mlirLLVMDIBasicTypeAttrGet(MlirContext ctx, unsigned int tag, unwrap(ctx), tag, cast(unwrap(name)), sizeInBits, encoding)); } +MlirAttribute mlirLLVMDICompositeTypeAttrGetRecSelf(MlirAttribute recId) { + return wrap( + DICompositeTypeAttr::getRecSelf(cast(unwrap(recId)))); +} + MlirAttribute mlirLLVMDICompositeTypeAttrGet( - MlirContext ctx, unsigned int tag, MlirAttribute recId, MlirAttribute name, - MlirAttribute file, uint32_t line, MlirAttribute scope, + MlirContext ctx, MlirAttribute recId, bool isRecSelf, unsigned int tag, + MlirAttribute name, MlirAttribute file, uint32_t line, MlirAttribute scope, MlirAttribute baseType, int64_t flags, uint64_t sizeInBits, uint64_t alignInBits, intptr_t nElements, MlirAttribute const *elements, MlirAttribute dataLocation, MlirAttribute rank, MlirAttribute allocated, @@ -170,7 +175,7 @@ MlirAttribute mlirLLVMDICompositeTypeAttrGet( elementsStorage.reserve(nElements); return wrap(DICompositeTypeAttr::get( - unwrap(ctx), tag, cast(unwrap(recId)), + unwrap(ctx), cast(unwrap(recId)), isRecSelf, tag, cast(unwrap(name)), cast(unwrap(file)), line, cast(unwrap(scope)), cast(unwrap(baseType)), DIFlags(flags), sizeInBits, alignInBits, @@ -289,16 +294,21 @@ MlirAttribute mlirLLVMDISubroutineTypeAttrGet(MlirContext ctx, [](Attribute a) { return cast(a); }))); } +MlirAttribute mlirLLVMDISubprogramAttrGetRecSelf(MlirAttribute recId) { + return wrap(DISubprogramAttr::getRecSelf(cast(unwrap(recId)))); +} + MlirAttribute mlirLLVMDISubprogramAttrGet( - MlirContext ctx, MlirAttribute id, MlirAttribute compileUnit, - MlirAttribute scope, MlirAttribute name, MlirAttribute linkageName, - MlirAttribute file, unsigned int line, unsigned int scopeLine, - uint64_t subprogramFlags, MlirAttribute type, intptr_t nRetainedNodes, - MlirAttribute const *retainedNodes) { + MlirContext ctx, MlirAttribute recId, bool isRecSelf, MlirAttribute id, + MlirAttribute compileUnit, MlirAttribute scope, MlirAttribute name, + MlirAttribute linkageName, MlirAttribute file, unsigned int line, + unsigned int scopeLine, uint64_t subprogramFlags, MlirAttribute type, + intptr_t nRetainedNodes, MlirAttribute const *retainedNodes) { SmallVector nodesStorage; nodesStorage.reserve(nRetainedNodes); return wrap(DISubprogramAttr::get( - unwrap(ctx), cast(unwrap(id)), + unwrap(ctx), cast(unwrap(recId)), isRecSelf, + cast(unwrap(id)), cast(unwrap(compileUnit)), cast(unwrap(scope)), cast(unwrap(name)), cast(unwrap(linkageName)), cast(unwrap(file)), @@ -353,14 +363,15 @@ MlirAttribute mlirLLVMDIModuleAttrGetScope(MlirAttribute diModule) { } MlirAttribute mlirLLVMDIImportedEntityAttrGet( - MlirContext ctx, unsigned int tag, MlirAttribute entity, MlirAttribute file, - unsigned int line, MlirAttribute name, intptr_t nElements, - MlirAttribute const *elements) { + MlirContext ctx, unsigned int tag, MlirAttribute scope, + MlirAttribute entity, MlirAttribute file, unsigned int line, + MlirAttribute name, intptr_t nElements, MlirAttribute const *elements) { SmallVector elementsStorage; elementsStorage.reserve(nElements); return wrap(DIImportedEntityAttr::get( - unwrap(ctx), tag, cast(unwrap(entity)), - cast(unwrap(file)), line, cast(unwrap(name)), + unwrap(ctx), tag, cast(unwrap(scope)), + cast(unwrap(entity)), cast(unwrap(file)), line, + cast(unwrap(name)), llvm::map_to_vector(unwrapList(nElements, elements, elementsStorage), [](Attribute a) { return cast(a); }))); } diff --git a/mlir/lib/Conversion/ArithToArmSME/ArithToArmSME.cpp b/mlir/lib/Conversion/ArithToArmSME/ArithToArmSME.cpp index b12aa92001ff29..5aa2a098b17621 100644 --- a/mlir/lib/Conversion/ArithToArmSME/ArithToArmSME.cpp +++ b/mlir/lib/Conversion/ArithToArmSME/ArithToArmSME.cpp @@ -64,7 +64,7 @@ struct ConstantOpToArmSMELowering : public OpRewritePattern { return success(); } - // Lower non-zero constants to a loop of 'arm_sme.move_vector_to_tile_slice' + // Lower non-zero constants to a loop of 'arm_sme.insert_tile_slice' // ops that broadcast the constant to each tile slice. auto loc = constantOp.getLoc(); @@ -79,9 +79,9 @@ struct ConstantOpToArmSMELowering : public OpRewritePattern { auto initTile = rewriter.create(loc, tileType); auto makeLoopBody = [&](OpBuilder &b, Location loc, Value tileSliceIndex, Value currentTile) { - // Create 'arm_sme.move_vector_to_tile_slice' to write vector to tile + // Create 'arm_sme.insert_tile_slice' to write vector to tile // slice. - auto nextTile = b.create( + auto nextTile = b.create( loc, tileType, constantOp1D, currentTile, tileSliceIndex); return nextTile.getResult(); }; diff --git a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp index 1ad2ec6cee7f8c..f1fa411b82914a 100644 --- a/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp +++ b/mlir/lib/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.cpp @@ -575,23 +575,23 @@ struct StoreTileSliceConversion } }; -/// Lower `arm_sme.move_vector_to_tile_slice` to SME intrinsics. -struct MoveVectorToTileSliceConversion - : public ConvertArmSMEOpToLLVMPattern { +/// Lower `arm_sme.insert_tile_slice` to SME intrinsics. +struct InsertTileSliceConversion + : public ConvertArmSMEOpToLLVMPattern { using ConvertArmSMEOpToLLVMPattern::ConvertArmSMEOpToLLVMPattern; LogicalResult - matchAndRewrite(arm_sme::MoveVectorToTileSliceOp moveVectorToTileSliceOp, - arm_sme::MoveVectorToTileSliceOp::Adaptor adaptor, + matchAndRewrite(arm_sme::InsertTileSliceOp insertTileSliceOp, + arm_sme::InsertTileSliceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto loc = moveVectorToTileSliceOp.getLoc(); - auto tileType = moveVectorToTileSliceOp.getTileType(); + auto loc = insertTileSliceOp.getLoc(); + auto tileType = insertTileSliceOp.getTileType(); - auto tileId = getTileIdOrError(moveVectorToTileSliceOp); + auto tileId = getTileIdOrError(insertTileSliceOp); if (!tileId) return failure(); - auto tileSlice = moveVectorToTileSliceOp.getTileSliceIndex(); + auto tileSlice = insertTileSliceOp.getTileSliceIndex(); // Cast tile slice from index to i32 for intrinsic. auto tileSliceI32 = rewriter.create( @@ -606,42 +606,40 @@ struct MoveVectorToTileSliceConversion auto allActiveMask = rewriter.create(loc, predTy, one); // Create 'arm_sme.intr.write.(horiz|vert)' to write vector to tile slice. - switch (moveVectorToTileSliceOp.getLayout()) { + switch (insertTileSliceOp.getLayout()) { case arm_sme::TileSliceLayout::Horizontal: rewriter.create( loc, tileId, tileSliceI32, allActiveMask, - moveVectorToTileSliceOp.getVector()); + insertTileSliceOp.getVector()); break; case arm_sme::TileSliceLayout::Vertical: rewriter.create( loc, tileId, tileSliceI32, allActiveMask, - moveVectorToTileSliceOp.getVector()); + insertTileSliceOp.getVector()); break; } - // Intrinsic has no result, replace 'arm_sme.move_vector_to_tile_slice' with + // Intrinsic has no result, replace 'arm_sme.insert_tile_slice' with // the input tile to preserve dataflow. - rewriter.replaceOp(moveVectorToTileSliceOp, - moveVectorToTileSliceOp.getTile()); + rewriter.replaceOp(insertTileSliceOp, insertTileSliceOp.getTile()); return success(); } }; -/// Lower `arm_sme.move_tile_slice_to_vector` to SME intrinsics. -struct MoveTileSliceToVectorConversion - : public ConvertArmSMEOpToLLVMPattern { +/// Lower `arm_sme.extract_tile_slice` to SME intrinsics. +struct ExtractTileSliceConversion + : public ConvertArmSMEOpToLLVMPattern { using ConvertArmSMEOpToLLVMPattern::ConvertArmSMEOpToLLVMPattern; LogicalResult - matchAndRewrite(arm_sme::MoveTileSliceToVectorOp moveTileSliceToVector, - OpAdaptor, + matchAndRewrite(arm_sme::ExtractTileSliceOp extractTileSlice, OpAdaptor, ConversionPatternRewriter &rewriter) const override { - auto loc = moveTileSliceToVector.getLoc(); - auto sliceType = moveTileSliceToVector.getSliceType(); - auto sliceIndex = moveTileSliceToVector.getTileSliceIndex(); + auto loc = extractTileSlice.getLoc(); + auto sliceType = extractTileSlice.getSliceType(); + auto sliceIndex = extractTileSlice.getTileSliceIndex(); - auto tileId = getTileIdOrError(moveTileSliceToVector); + auto tileId = getTileIdOrError(extractTileSlice); if (!tileId) return failure(); @@ -659,16 +657,16 @@ struct MoveTileSliceToVectorConversion loc, rewriter.getI32Type(), sliceIndex); // Create 'arm_sme.intr.read.(horiz|vert)' to extract the tile slice. - switch (moveTileSliceToVector.getLayout()) { + switch (extractTileSlice.getLayout()) { case arm_sme::TileSliceLayout::Horizontal: rewriter.replaceOpWithNewOp( - moveTileSliceToVector, sliceType, zeroVector, allTruePredicate, - tileId, sliceIndexI32); + extractTileSlice, sliceType, zeroVector, allTruePredicate, tileId, + sliceIndexI32); break; case arm_sme::TileSliceLayout::Vertical: rewriter.replaceOpWithNewOp( - moveTileSliceToVector, sliceType, zeroVector, allTruePredicate, - tileId, sliceIndexI32); + extractTileSlice, sliceType, zeroVector, allTruePredicate, tileId, + sliceIndexI32); break; } @@ -985,8 +983,8 @@ void mlir::populateArmSMEToLLVMConversionPatterns(LLVMTypeConverter &converter, }); addArmSMEConversionPatterns< - LoadTileSliceConversion, MoveTileSliceToVectorConversion, - MoveVectorToTileSliceConversion, StoreTileSliceConversion, + LoadTileSliceConversion, ExtractTileSliceConversion, + InsertTileSliceConversion, StoreTileSliceConversion, StreamingVLOpConversion, OuterProductOpConversion, OuterProductWideningOpConversion, diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp index 9f55932c33af66..411c9d2ebd8386 100644 --- a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp +++ b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp @@ -245,8 +245,8 @@ struct TileLoadOpConversion : public OpRewritePattern { /// : memref, vector<[4]xi1>, /// vector<[4]xi32> into vector<[4]xi32> /// // Insert slice into tile -/// %tile_update = arm_sme.move_vector_to_tile_slice -/// %slice, %iter_tile, %tile_slice_idx : +/// %tile_update = arm_sme.insert_tile_slice +/// %slice, %iter_tile[%tile_slice_idx] : /// vector<[4]xi32> into vector<[4]x[4]xi32> /// scf.yield %tile_update : vector<[4]x[4]xi32> /// } @@ -332,11 +332,11 @@ struct TileLoadOpWithMaskAndPadNonZeroConversion loc, tileSliceType, tileLoadOp.getBase(), memrefIndices, maskOp1D, /*passthru=*/pad1DOp); - // Create 'arm_sme.move_vector_to_tile_slice' to move slice into tile. - auto moveSlice = rewriter.create( + // Create 'arm_sme.insert_tile_slice' to insert slice into tile. + auto insertSlice = rewriter.create( loc, tileType, loadSlice->getResult(0), currentTile, tileSliceIndex, tileLoadOp.getLayout()); - rewriter.create(loc, moveSlice.getResult()); + rewriter.create(loc, insertSlice.getResult()); rewriter.setInsertionPointAfter(forOp); diff --git a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp index ee52b9ef6a6f6b..55965d9c2a531d 100644 --- a/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp +++ b/mlir/lib/Conversion/VectorToArmSME/VectorToArmSME.cpp @@ -199,8 +199,8 @@ struct VectorStoreToArmSMELowering : public OpRewritePattern { /// %broadcast_to_tile = scf.for %tile_slice_index = %c0 to %num_tile_slices /// step %c1 iter_args(%iter_tile = %init_tile) -> (vector<[4]x[4]xi32>) /// { -/// %tile_update = arm_sme.move_vector_to_tile_slice -/// %broadcast_to_1d, %iter_tile, %tile_slice_index : +/// %tile_update = arm_sme.insert_tile_slice +/// %broadcast_to_1d, %iter_tile[%tile_slice_index] : /// vector<[4]xi32> into vector<[4]x[4]xi32> /// scf.yield %tile_update : vector<[4]x[4]xi32> /// } @@ -238,9 +238,9 @@ struct BroadcastOpToArmSMELowering auto makeLoopBody = [&](OpBuilder &b, Location loc, Value tileSliceIndex, Value currentTile) { - // Create 'arm_sme.move_vector_to_tile_slice' to broadcast the value + // Create 'arm_sme.insert_tile_slice' to broadcast the value // to each tile slice. - auto nextTile = b.create( + auto nextTile = b.create( loc, tileType, broadcastOp1D, currentTile, tileSliceIndex); return nextTile.getResult(); }; @@ -267,8 +267,8 @@ struct BroadcastOpToArmSMELowering /// %broadcast_to_tile = scf.for %tile_slice_index = %c0 to %num_tile_slices /// step %c1 iter_args(%iter_tile = %init_tile) -> (vector<[4]x[4]xi32>) /// { -/// %tile_update = arm_sme.move_vector_to_tile_slice -/// %broadcast_to_1d, %iter_tile, %tile_slice_index : +/// %tile_update = arm_sme.insert_tile_slice +/// %broadcast_to_1d, %iter_tile[%tile_slice_index] : /// vector<[4]xi32> into vector<[4]x[4]xi32> /// scf.yield %tile_update : vector<[4]x[4]xi32> /// } @@ -299,7 +299,7 @@ struct SplatOpToArmSMELowering : public OpRewritePattern { auto makeLoopBody = [&](OpBuilder &b, Location loc, Value tileSliceIndex, Value currentTile) { - auto nextTile = b.create( + auto nextTile = b.create( loc, tileType, broadcastOp1D, currentTile, tileSliceIndex); return nextTile.getResult(); }; @@ -497,7 +497,7 @@ struct VectorOuterProductToArmSMELowering } }; -/// Lower `vector.extract` using `arm_sme.move_tile_slice_to_vector`. +/// Lower `vector.extract` using `arm_sme.extract_tile_slice`. /// /// Example: /// ``` @@ -505,7 +505,7 @@ struct VectorOuterProductToArmSMELowering /// ``` /// Becomes: /// ``` -/// %slice = arm_sme.move_tile_slice_to_vector %tile[%row] +/// %slice = arm_sme.extract_tile_slice %tile[%row] /// : vector<[4]xi32> from vector<[4]x[4]xi32> /// %el = vector.extract %slice[%col] : i32 from vector<[4]xi32> /// ``` @@ -531,27 +531,26 @@ struct VectorExtractToArmSMELowering } Value sliceIndex = vector::getAsValues(rewriter, loc, position[0]).front(); - auto moveTileSliceToVector = - rewriter.create(loc, sourceVector, - sliceIndex); + auto extractTileSlice = rewriter.create( + loc, sourceVector, sliceIndex); if (position.size() == 1) { // Single index case: Extracts a 1D slice. - rewriter.replaceOp(extractOp, moveTileSliceToVector); + rewriter.replaceOp(extractOp, extractTileSlice); return success(); } // Two indices case: Extracts a single element. assert(position.size() == 2); - rewriter.replaceOpWithNewOp( - extractOp, moveTileSliceToVector, position[1]); + rewriter.replaceOpWithNewOp(extractOp, extractTileSlice, + position[1]); return success(); } }; -/// Lower `vector.insert` using `arm_sme.move_vector_to_tile_slice` and -/// `arm_sme.move_tile_slice_to_vector`. +/// Lower `vector.insert` using `arm_sme.insert_tile_slice` and +/// `arm_sme.extract_tile_slice`. /// /// Example: /// ``` @@ -560,10 +559,10 @@ struct VectorExtractToArmSMELowering /// ``` /// Becomes: /// ``` -/// %slice = arm_sme.move_tile_slice_to_vector %tile[%row] +/// %slice = arm_sme.extract_tile_slice %tile[%row] /// : vector<[4]xi32> from vector<[4]x[4]xi32> /// %new_slice = vector.insert %el, %slice[%col] : i32 into vector<[4]xi32> -/// %new_tile = arm_sme.move_vector_to_tile_slice %new_slice, %tile, %row +/// %new_tile = arm_sme.insert_tile_slice %new_slice, %tile[%row] /// : vector<[4]xi32> into vector<[4]x[4]xi32> /// ``` struct VectorInsertToArmSMELowering @@ -594,21 +593,21 @@ struct VectorInsertToArmSMELowering if (position.size() == 2) { // Two indices case: Insert single element into tile. // We need to first extract the existing slice and update the element. - tileSlice = rewriter.create( + tileSlice = rewriter.create( loc, insertOp.getDest(), sliceIndex); tileSlice = rewriter.create(loc, source, tileSlice, position[1]); } // Insert the slice into the destination tile. - rewriter.replaceOpWithNewOp( + rewriter.replaceOpWithNewOp( insertOp, tileSlice, insertOp.getDest(), sliceIndex); return success(); } }; /// Lowers `vector.print` of a tile into a loop over the rows of the tile, -/// extracting them via `arm_sme.move_tile_slice_to_vector`, then printing with +/// extracting them via `arm_sme.extract_tile_slice`, then printing with /// a 1D `vector.print`. /// /// BEFORE: @@ -623,7 +622,7 @@ struct VectorInsertToArmSMELowering /// %vscale = vector.vscale /// %svl_s = arith.muli %c4, %vscale : index /// scf.for %i = %c0 to %svl_s step %c1 { -/// %tile_slice = arm_sme.move_tile_slice_to_vector %tile[%i] +/// %tile_slice = arm_sme.extract_tile_slice %tile[%i] /// : vector<[4]xf32> from vector<[4]x[4]xf32> /// vector.print %tile_slice : vector<[4]xf32> /// } @@ -655,7 +654,7 @@ struct VectorPrintToArmSMELowering : public OpRewritePattern { rewriter.setInsertionPointToStart(forOp.getBody()); // Extract the current row from the tile. Value rowIndex = forOp.getInductionVar(); - auto tileSlice = rewriter.create( + auto tileSlice = rewriter.create( loc, printOp.getSource(), rowIndex); // Print the row with a 1D vector.print. rewriter.create(loc, tileSlice, @@ -667,11 +666,11 @@ struct VectorPrintToArmSMELowering : public OpRewritePattern { } }; -/// Folds a MoveTileSliceToVectorOp + TransferWriteOp to a StoreTileSliceOp. +/// Folds a ExtractTileSliceOp + TransferWriteOp to a StoreTileSliceOp. /// /// BEFORE: /// ```mlir -/// %slice = arm_sme.move_tile_slice_to_vector %tile[%index] +/// %slice = arm_sme.extract_tile_slice %tile[%index] /// : vector<[4]xf32> from vector<[4]x[4]xf32> /// vector.transfer_write %slice, %memref[%i, %j], %mask {in_bounds = [true]} /// : vector<[4]xf32>, memref @@ -694,11 +693,11 @@ struct FoldTransferWriteOfExtractTileSlice return rewriter.notifyMatchFailure(writeOp, "not inbounds transfer write"); - auto moveTileSlice = - writeOp.getVector().getDefiningOp(); - if (!moveTileSlice) + auto extractTileSlice = + writeOp.getVector().getDefiningOp(); + if (!extractTileSlice) return rewriter.notifyMatchFailure( - writeOp, "vector to store not from MoveTileSliceToVectorOp"); + writeOp, "vector to store not from ExtractTileSliceOp"); AffineMap map = writeOp.getPermutationMap(); if (!map.isMinorIdentity()) @@ -713,9 +712,9 @@ struct FoldTransferWriteOfExtractTileSlice } rewriter.replaceOpWithNewOp( - writeOp, moveTileSlice.getTile(), moveTileSlice.getTileSliceIndex(), - mask, writeOp.getSource(), writeOp.getIndices(), - moveTileSlice.getLayout()); + writeOp, extractTileSlice.getTile(), + extractTileSlice.getTileSliceIndex(), mask, writeOp.getSource(), + writeOp.getIndices(), extractTileSlice.getLayout()); return success(); } }; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 98a9659735e7e6..491dcc7f01e73d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -203,16 +203,33 @@ void printExpressionArg(AsmPrinter &printer, uint64_t opcode, DIRecursiveTypeAttrInterface DICompositeTypeAttr::withRecId(DistinctAttr recId) { return DICompositeTypeAttr::get( - getContext(), getTag(), recId, getName(), getFile(), getLine(), - getScope(), getBaseType(), getFlags(), getSizeInBits(), getAlignInBits(), - getElements(), getDataLocation(), getRank(), getAllocated(), - getAssociated()); + getContext(), recId, getIsRecSelf(), getTag(), getName(), getFile(), + getLine(), getScope(), getBaseType(), getFlags(), getSizeInBits(), + getAlignInBits(), getElements(), getDataLocation(), getRank(), + getAllocated(), getAssociated()); } DIRecursiveTypeAttrInterface DICompositeTypeAttr::getRecSelf(DistinctAttr recId) { - return DICompositeTypeAttr::get(recId.getContext(), 0, recId, {}, {}, 0, {}, - {}, DIFlags(), 0, 0, {}, {}, {}, {}, {}); + return DICompositeTypeAttr::get(recId.getContext(), recId, /*isRecSelf=*/true, + 0, {}, {}, 0, {}, {}, DIFlags(), 0, 0, {}, {}, + {}, {}, {}); +} + +//===----------------------------------------------------------------------===// +// DISubprogramAttr +//===----------------------------------------------------------------------===// + +DIRecursiveTypeAttrInterface DISubprogramAttr::withRecId(DistinctAttr recId) { + return DISubprogramAttr::get( + getContext(), recId, getIsRecSelf(), getId(), getCompileUnit(), + getScope(), getName(), getLinkageName(), getFile(), getLine(), + getScopeLine(), getSubprogramFlags(), getType(), getRetainedNodes()); +} + +DIRecursiveTypeAttrInterface DISubprogramAttr::getRecSelf(DistinctAttr recId) { + return DISubprogramAttr::get(recId.getContext(), recId, /*isRecSelf=*/true, + {}, {}, {}, {}, {}, 0, 0, {}, {}, {}, {}); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 7e3026f1044a06..6ad688ffe85f3f 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -3155,9 +3155,9 @@ struct LLVMOpAsmDialectInterface : public OpAsmDialectInterface { .CasesetLoc(FusedLoc::get(context, {loc}, subprogramAttr)); } diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp index ce3643f513d34a..8c6f32f6bb0cd0 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp +++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp @@ -89,10 +89,9 @@ DICompositeTypeAttr DebugImporter::translateImpl(llvm::DICompositeType *node) { if (node->getTag() == llvm::dwarf::DW_TAG_array_type && !baseType) return nullptr; return DICompositeTypeAttr::get( - context, node->getTag(), /*recId=*/{}, - getStringAttrOrNull(node->getRawName()), translate(node->getFile()), - node->getLine(), translate(node->getScope()), baseType, - flags.value_or(DIFlags::Zero), node->getSizeInBits(), + context, node->getTag(), getStringAttrOrNull(node->getRawName()), + translate(node->getFile()), node->getLine(), translate(node->getScope()), + baseType, flags.value_or(DIFlags::Zero), node->getSizeInBits(), node->getAlignInBits(), elements, translateExpression(node->getDataLocationExp()), translateExpression(node->getRankExp()), @@ -217,8 +216,8 @@ DebugImporter::translateImpl(llvm::DIImportedEntity *node) { } return DIImportedEntityAttr::get( - context, node->getTag(), translate(node->getEntity()), - translate(node->getFile()), node->getLine(), + context, node->getTag(), translate(node->getScope()), + translate(node->getEntity()), translate(node->getFile()), node->getLine(), getStringAttrOrNull(node->getRawName()), elements); } @@ -227,6 +226,7 @@ DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) { mlir::DistinctAttr id; if (node->isDistinct()) id = getOrCreateDistinctID(node); + // Return nullptr if the scope or type is invalid. DIScopeAttr scope = translate(node->getScope()); if (node->getScope() && !scope) @@ -238,9 +238,12 @@ DISubprogramAttr DebugImporter::translateImpl(llvm::DISubprogram *node) { if (node->getType() && !type) return nullptr; + // Convert the retained nodes but drop all of them if one of them is invalid. SmallVector retainedNodes; for (llvm::DINode *retainedNode : node->getRetainedNodes()) retainedNodes.push_back(translate(retainedNode)); + if (llvm::is_contained(retainedNodes, nullptr)) + retainedNodes.clear(); return DISubprogramAttr::get(context, id, translate(node->getUnit()), scope, getStringAttrOrNull(node->getRawName()), @@ -374,6 +377,9 @@ getRecSelfConstructor(llvm::DINode *node) { .Case([&](llvm::DICompositeType *) { return CtorType(DICompositeTypeAttr::getRecSelf); }) + .Case([&](llvm::DISubprogram *) { + return CtorType(DISubprogramAttr::getRecSelf); + }) .Default(CtorType()); } diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 0b08a5303d62ec..59dfcb5e6616a5 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -96,6 +96,17 @@ llvm::MDString *DebugTranslation::getMDStringOrNull(StringAttr stringAttr) { return llvm::MDString::get(llvmCtx, stringAttr); } +llvm::MDTuple * +DebugTranslation::getMDTupleOrNull(ArrayRef elements) { + if (elements.empty()) + return nullptr; + SmallVector llvmElements = llvm::to_vector( + llvm::map_range(elements, [&](DINodeAttr attr) -> llvm::Metadata * { + return translate(attr); + })); + return llvm::MDNode::get(llvmCtx, llvmElements); +} + llvm::DIBasicType *DebugTranslation::translateImpl(DIBasicTypeAttr attr) { return llvm::DIBasicType::get( llvmCtx, attr.getTag(), getMDStringOrNull(attr.getName()), @@ -138,6 +149,17 @@ DebugTranslation::translateTemporaryImpl(DICompositeTypeAttr attr) { /*VTableHolder=*/nullptr); } +llvm::TempDISubprogram +DebugTranslation::translateTemporaryImpl(DISubprogramAttr attr) { + return llvm::DISubprogram::getTemporary( + llvmCtx, /*Scope=*/nullptr, /*Name=*/{}, /*LinkageName=*/{}, + /*File=*/nullptr, attr.getLine(), /*Type=*/nullptr, + /*ScopeLine=*/0, /*ContainingType=*/nullptr, /*VirtualIndex=*/0, + /*ThisAdjustment=*/0, llvm::DINode::FlagZero, + static_cast(attr.getSubprogramFlags()), + /*Unit=*/nullptr); +} + llvm::DICompositeType * DebugTranslation::translateImpl(DICompositeTypeAttr attr) { // TODO: Use distinct attributes to model this, once they have landed. @@ -151,10 +173,6 @@ DebugTranslation::translateImpl(DICompositeTypeAttr attr) { isDistinct = true; } - SmallVector elements; - for (DINodeAttr member : attr.getElements()) - elements.push_back(translate(member)); - return getDistinctOrUnique( isDistinct, llvmCtx, attr.getTag(), getMDStringOrNull(attr.getName()), translate(attr.getFile()), attr.getLine(), translate(attr.getScope()), @@ -162,7 +180,7 @@ DebugTranslation::translateImpl(DICompositeTypeAttr attr) { attr.getAlignInBits(), /*OffsetInBits=*/0, /*Flags=*/static_cast(attr.getFlags()), - llvm::MDNode::get(llvmCtx, elements), + getMDTupleOrNull(attr.getElements()), /*RuntimeLang=*/0, /*VTableHolder=*/nullptr, /*TemplateParams=*/nullptr, /*Identifier=*/nullptr, /*Discriminator=*/nullptr, @@ -244,22 +262,21 @@ DebugTranslation::translateImpl(DIGlobalVariableAttr attr) { attr.getAlignInBits(), nullptr); } -llvm::DIType * +llvm::DINode * DebugTranslation::translateRecursive(DIRecursiveTypeAttrInterface attr) { DistinctAttr recursiveId = attr.getRecId(); - if (auto *iter = recursiveTypeMap.find(recursiveId); - iter != recursiveTypeMap.end()) { + if (auto *iter = recursiveNodeMap.find(recursiveId); + iter != recursiveNodeMap.end()) { return iter->second; - } else { - assert(!attr.isRecSelf() && "unbound DI recursive self type"); } + assert(!attr.getIsRecSelf() && "unbound DI recursive self reference"); - auto setRecursivePlaceholder = [&](llvm::DIType *placeholder) { - recursiveTypeMap.try_emplace(recursiveId, placeholder); + auto setRecursivePlaceholder = [&](llvm::DINode *placeholder) { + recursiveNodeMap.try_emplace(recursiveId, placeholder); }; - llvm::DIType *result = - TypeSwitch(attr) + llvm::DINode *result = + TypeSwitch(attr) .Case([&](auto attr) { auto temporary = translateTemporaryImpl(attr); setRecursivePlaceholder(temporary.get()); @@ -268,11 +285,20 @@ DebugTranslation::translateRecursive(DIRecursiveTypeAttrInterface attr) { auto *concrete = translateImpl(attr); temporary->replaceAllUsesWith(concrete); return concrete; + }) + .Case([&](auto attr) { + auto temporary = translateTemporaryImpl(attr); + setRecursivePlaceholder(temporary.get()); + // Must call `translateImpl` directly instead of `translate` to + // avoid handling the recursive interface again. + auto *concrete = translateImpl(attr); + temporary->replaceAllUsesWith(concrete); + return concrete; }); - assert(recursiveTypeMap.back().first == recursiveId && + assert(recursiveNodeMap.back().first == recursiveId && "internal inconsistency: unexpected recursive translation stack"); - recursiveTypeMap.pop_back(); + recursiveNodeMap.pop_back(); return result; } @@ -299,6 +325,7 @@ llvm::DISubprogram *DebugTranslation::translateImpl(DISubprogramAttr attr) { bool isDefinition = static_cast(attr.getSubprogramFlags() & LLVM::DISubprogramFlags::Definition); + llvm::DISubprogram *node = getDistinctOrUnique( isDefinition, llvmCtx, scope, getMDStringOrNull(attr.getName()), getMDStringOrNull(attr.getLinkageName()), file, attr.getLine(), type, @@ -306,21 +333,8 @@ llvm::DISubprogram *DebugTranslation::translateImpl(DISubprogramAttr attr) { /*ContainingType=*/nullptr, /*VirtualIndex=*/0, /*ThisAdjustment=*/0, llvm::DINode::FlagZero, static_cast(attr.getSubprogramFlags()), - compileUnit); - - // DIImportedEntity requires scope information which DIImportedEntityAttr does - // not have. This is why we translate DIImportedEntityAttr after we have - // created DISubprogram as we can use it as the scope. - SmallVector retainedNodes; - for (DINodeAttr nodeAttr : attr.getRetainedNodes()) { - if (auto importedAttr = dyn_cast(nodeAttr)) { - llvm::DINode *dn = translate(importedAttr, node); - retainedNodes.push_back(dn); - } - } - if (!retainedNodes.empty()) - node->replaceRetainedNodes(llvm::MDTuple::get(llvmCtx, retainedNodes)); - + compileUnit, /*TemplateParams=*/nullptr, /*Declaration=*/nullptr, + getMDTupleOrNull(attr.getRetainedNodes())); if (attr.getId()) distinctAttrToNode.try_emplace(attr.getId(), node); return node; @@ -341,16 +355,12 @@ llvm::DINamespace *DebugTranslation::translateImpl(DINamespaceAttr attr) { attr.getExportSymbols()); } -llvm::DIImportedEntity *DebugTranslation::translate(DIImportedEntityAttr attr, - llvm::DIScope *scope) { - SmallVector elements; - for (DINodeAttr member : attr.getElements()) - elements.push_back(translate(member)); - +llvm::DIImportedEntity * +DebugTranslation::translateImpl(DIImportedEntityAttr attr) { return llvm::DIImportedEntity::get( - llvmCtx, attr.getTag(), scope, translate(attr.getEntity()), - translate(attr.getFile()), attr.getLine(), - getMDStringOrNull(attr.getName()), llvm::MDNode::get(llvmCtx, elements)); + llvmCtx, attr.getTag(), translate(attr.getScope()), + translate(attr.getEntity()), translate(attr.getFile()), attr.getLine(), + getMDStringOrNull(attr.getName()), getMDTupleOrNull(attr.getElements())); } llvm::DISubrange *DebugTranslation::translateImpl(DISubrangeAttr attr) { @@ -415,10 +425,10 @@ llvm::DINode *DebugTranslation::translate(DINodeAttr attr) { node = TypeSwitch(attr) .Case( + DIImportedEntityAttr, DILabelAttr, DILexicalBlockAttr, + DILexicalBlockFileAttr, DILocalVariableAttr, DIModuleAttr, + DINamespaceAttr, DINullTypeAttr, DIStringTypeAttr, + DISubprogramAttr, DISubrangeAttr, DISubroutineTypeAttr>( [&](auto attr) { return translateImpl(attr); }); if (node && !node->isTemporary()) diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.h b/mlir/lib/Target/LLVMIR/DebugTranslation.h index 37b985acf8541e..422aa34e28f3c9 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.h +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.h @@ -75,6 +75,7 @@ class DebugTranslation { llvm::DIDerivedType *translateImpl(DIDerivedTypeAttr attr); llvm::DIStringType *translateImpl(DIStringTypeAttr attr); llvm::DIFile *translateImpl(DIFileAttr attr); + llvm::DIImportedEntity *translateImpl(DIImportedEntityAttr attr); llvm::DILabel *translateImpl(DILabelAttr attr); llvm::DILexicalBlock *translateImpl(DILexicalBlockAttr attr); llvm::DILexicalBlockFile *translateImpl(DILexicalBlockFileAttr attr); @@ -90,27 +91,26 @@ class DebugTranslation { llvm::DISubroutineType *translateImpl(DISubroutineTypeAttr attr); llvm::DIType *translateImpl(DITypeAttr attr); - /// Currently, DIImportedEntityAttr does not have a scope field to avoid a - /// cyclic dependency. The scope information is obtained from the entity - /// which holds the list of DIImportedEntityAttr. This requires that scope - /// information be passed to translate function. - llvm::DIImportedEntity *translate(DIImportedEntityAttr attr, llvm::DIScope *); - /// Attributes that support self recursion need to implement an additional /// method to hook into `translateRecursive`. /// - ` translateTemporaryImpl()`: /// Create a temporary translation of the DI attr without recursively /// translating any nested DI attrs. - llvm::DIType *translateRecursive(DIRecursiveTypeAttrInterface attr); + llvm::DINode *translateRecursive(DIRecursiveTypeAttrInterface attr); /// Translate the given attribute to a temporary llvm debug metadata of the /// corresponding type. llvm::TempDICompositeType translateTemporaryImpl(DICompositeTypeAttr attr); + llvm::TempDISubprogram translateTemporaryImpl(DISubprogramAttr attr); /// Constructs a string metadata node from the string attribute. Returns /// nullptr if `stringAttr` is null or contains and empty string. llvm::MDString *getMDStringOrNull(StringAttr stringAttr); + /// Constructs a tuple metadata node from the `elements`. Returns nullptr if + /// `elements` is empty. + llvm::MDTuple *getMDTupleOrNull(ArrayRef elements); + /// Constructs a DIExpression metadata node from the DIExpressionAttr. Returns /// nullptr if `DIExpressionAttr` is null. llvm::DIExpression *getExpressionAttrOrNull(DIExpressionAttr attr); @@ -125,8 +125,8 @@ class DebugTranslation { /// metadata. DenseMap attrToNode; - /// A mapping between recursive ID and the translated DIType. - llvm::MapVector recursiveTypeMap; + /// A mapping between recursive ID and the translated DINode. + llvm::MapVector recursiveNodeMap; /// A mapping between a distinct ID and the translated LLVM metadata node. /// This helps identify attrs that should translate into the same LLVM debug diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index da28a96f89691d..36277122801de4 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -248,12 +248,16 @@ static void testDebugInfoAttributes(MlirContext ctx) { mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("foo")); MlirAttribute bar = mlirStringAttrGet(ctx, mlirStringRefCreateFromCString("bar")); - MlirAttribute id = mlirDisctinctAttrCreate(foo); + + MlirAttribute none = mlirUnitAttrGet(ctx); + MlirAttribute id = mlirDisctinctAttrCreate(none); + MlirAttribute recId0 = mlirDisctinctAttrCreate(none); + MlirAttribute recId1 = mlirDisctinctAttrCreate(none); // CHECK: #llvm.di_null_type mlirAttributeDump(mlirLLVMDINullTypeAttrGet(ctx)); - // CHECK: #llvm.di_basic_type MlirAttribute di_type = mlirLLVMDIBasicTypeAttrGet(ctx, 0, foo, 64, MlirLLVMTypeEncodingSigned); @@ -312,15 +316,17 @@ static void testDebugInfoAttributes(MlirContext ctx) { // CHECK: #llvm.di_subroutine_type<{{.*}}> mlirAttributeDump(subroutine_type); + MlirAttribute di_subprogram_self_rec = + mlirLLVMDISubprogramAttrGetRecSelf(recId0); MlirAttribute di_imported_entity = mlirLLVMDIImportedEntityAttrGet( - ctx, 0, di_module, file, 1, foo, 1, &local_var); + ctx, 0, di_subprogram_self_rec, di_module, file, 1, foo, 1, &local_var); mlirAttributeDump(di_imported_entity); // CHECK: #llvm.di_imported_entity<{{.*}}> MlirAttribute di_subprogram = mlirLLVMDISubprogramAttrGet( - ctx, id, compile_unit, compile_unit, foo, bar, file, 1, 2, 0, - subroutine_type, 1, &di_imported_entity); + ctx, recId0, false, id, compile_unit, compile_unit, foo, bar, file, 1, 2, + 0, subroutine_type, 1, &di_imported_entity); // CHECK: #llvm.di_subprogram<{{.*}}> mlirAttributeDump(di_subprogram); @@ -350,10 +356,13 @@ static void testDebugInfoAttributes(MlirContext ctx) { // CHECK: #llvm.di_string_type<{{.*}}> mlirAttributeDump(string_type); + // CHECK: #llvm.di_composite_type + mlirAttributeDump(mlirLLVMDICompositeTypeAttrGetRecSelf(recId1)); + // CHECK: #llvm.di_composite_type<{{.*}}> mlirAttributeDump(mlirLLVMDICompositeTypeAttrGet( - ctx, 0, id, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, &di_type, - expression, expression, expression, expression)); + ctx, recId1, false, 0, foo, file, 1, compile_unit, di_type, 0, 64, 8, 1, + &di_type, expression, expression, expression, expression)); } int main(void) { diff --git a/mlir/test/Conversion/ArithToArmSME/arith-to-arm-sme.mlir b/mlir/test/Conversion/ArithToArmSME/arith-to-arm-sme.mlir index 49d2e2f3c182b9..0faac9c847f5ff 100644 --- a/mlir/test/Conversion/ArithToArmSME/arith-to-arm-sme.mlir +++ b/mlir/test/Conversion/ArithToArmSME/arith-to-arm-sme.mlir @@ -99,7 +99,7 @@ func.func @arith_constant_dense_2d_zero_f64() { // CHECK: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C16]] : index // CHECK: %[[TILE:.*]] = scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[INIT_TILE]]) -> (vector<[16]x[16]xi8>) { -// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.move_vector_to_tile_slice %[[C2_SPLAT]], %[[CURRENT_TILE]], %[[TILE_SLICE_INDEX]] : vector<[16]xi8> into vector<[16]x[16]xi8> +// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.insert_tile_slice %[[C2_SPLAT]], %[[CURRENT_TILE]][%[[TILE_SLICE_INDEX]]] : vector<[16]xi8> into vector<[16]x[16]xi8> // CHECK: scf.yield %[[TILE_UPDATE]] : vector<[16]x[16]xi8> // CHECK: "prevent.dce"(%[[TILE]]) : (vector<[16]x[16]xi8>) -> () func.func @arith_constant_dense_2d_nonzero_i8() { @@ -119,7 +119,7 @@ func.func @arith_constant_dense_2d_nonzero_i8() { // CHECK: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C2]] : index // CHECK: %[[TILE:.*]] = scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[INIT_TILE]]) -> (vector<[2]x[2]xf64>) { -// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.move_vector_to_tile_slice %[[C2_SPLAT]], %[[CURRENT_TILE]], %[[TILE_SLICE_INDEX]] : vector<[2]xf64> into vector<[2]x[2]xf64> +// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.insert_tile_slice %[[C2_SPLAT]], %[[CURRENT_TILE]][%[[TILE_SLICE_INDEX]]] : vector<[2]xf64> into vector<[2]x[2]xf64> // CHECK: scf.yield %[[TILE_UPDATE]] : vector<[2]x[2]xf64> // CHECK: "prevent.dce"(%[[TILE]]) : (vector<[2]x[2]xf64>) -> () func.func @arith_constant_dense_2d_nonzero_f64() { diff --git a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir index ef85f3d069d743..6a4d77e86ab583 100644 --- a/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir +++ b/mlir/test/Conversion/ArmSMEToLLVM/arm-sme-to-llvm.mlir @@ -448,134 +448,134 @@ func.func @arm_sme_store_tile_slice_ver_f64(%tile_slice_index : index, %mask : v } //===----------------------------------------------------------------------===// -// arm_sme.move_vector_to_tile_slice +// arm_sme.insert_tile_slice //===----------------------------------------------------------------------===// // ----- -// CHECK-LABEL: @arm_sme_move_vector_to_tile_slice_hor_i32 +// CHECK-LABEL: @arm_sme_insert_tile_slice_hor_i32 // CHECK: "arm_sme.intr.write.horiz"({{.*}}) <{tile_id = 0 : i32}> : (i32, vector<[4]xi1>, vector<[4]xi32>) -> () -func.func @arm_sme_move_vector_to_tile_slice_hor_i32(%vector : vector<[4]xi32>, %tile_slice_index : index) -> () { +func.func @arm_sme_insert_tile_slice_hor_i32(%vector : vector<[4]xi32>, %tile_slice_index : index) -> () { %c0 = arith.constant 0 : index %tile = arm_sme.get_tile : vector<[4]x[4]xi32> - %tile_update = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[4]xi32> into vector<[4]x[4]xi32> + %tile_update = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xi32> into vector<[4]x[4]xi32> "test.some_use" (%tile_update) : (vector<[4]x[4]xi32>) -> () return } // ----- -// CHECK-LABEL: @arm_sme_move_vector_to_tile_slice_ver_bf16 +// CHECK-LABEL: @arm_sme_insert_tile_slice_ver_bf16 // CHECK: "arm_sme.intr.write.vert"({{.*}}) <{tile_id = 0 : i32}> : (i32, vector<[8]xi1>, vector<[8]xbf16>) -> () -func.func @arm_sme_move_vector_to_tile_slice_ver_bf16(%vector : vector<[8]xbf16>, %tile_slice_index : index) -> () { +func.func @arm_sme_insert_tile_slice_ver_bf16(%vector : vector<[8]xbf16>, %tile_slice_index : index) -> () { %c0 = arith.constant 0 : index %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> - %tile_update = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index layout : vector<[8]xbf16> into vector<[8]x[8]xbf16> + %tile_update = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] layout : vector<[8]xbf16> into vector<[8]x[8]xbf16> "test.some_use" (%tile_update) : (vector<[8]x[8]xbf16>) -> () return } //===----------------------------------------------------------------------===// -// arm_sme.move_tile_slice_to_vector +// arm_sme.extract_tile_slice //===----------------------------------------------------------------------===// // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_i8 +// CHECK-LABEL: @arm_sme_extract_tile_slice_i8 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[16]xi8>, vector<[16]xi1>, i32) -> vector<[16]xi8> -func.func @arm_sme_move_tile_slice_to_vector_i8(%tile_slice_index : index) -> vector<[16]xi8> { +func.func @arm_sme_extract_tile_slice_i8(%tile_slice_index : index) -> vector<[16]xi8> { %tile = arm_sme.get_tile : vector<[16]x[16]xi8> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> return %slice : vector<[16]xi8> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_i16 +// CHECK-LABEL: @arm_sme_extract_tile_slice_i16 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[8]xi16>, vector<[8]xi1>, i32) -> vector<[8]xi16> -func.func @arm_sme_move_tile_slice_to_vector_i16(%tile_slice_index : index) -> vector<[8]xi16> { +func.func @arm_sme_extract_tile_slice_i16(%tile_slice_index : index) -> vector<[8]xi16> { %tile = arm_sme.get_tile : vector<[8]x[8]xi16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16> return %slice : vector<[8]xi16> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_i32 +// CHECK-LABEL: @arm_sme_extract_tile_slice_i32 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[4]xi32>, vector<[4]xi1>, i32) -> vector<[4]xi32> -func.func @arm_sme_move_tile_slice_to_vector_i32(%tile_slice_index : index) -> vector<[4]xi32> { +func.func @arm_sme_extract_tile_slice_i32(%tile_slice_index : index) -> vector<[4]xi32> { %tile = arm_sme.get_tile : vector<[4]x[4]xi32> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32> return %slice : vector<[4]xi32> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_i64 +// CHECK-LABEL: @arm_sme_extract_tile_slice_i64 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[2]xi64>, vector<[2]xi1>, i32) -> vector<[2]xi64> -func.func @arm_sme_move_tile_slice_to_vector_i64(%tile_slice_index : index) -> vector<[2]xi64> { +func.func @arm_sme_extract_tile_slice_i64(%tile_slice_index : index) -> vector<[2]xi64> { %tile = arm_sme.get_tile : vector<[2]x[2]xi64> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64> return %slice : vector<[2]xi64> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_i128 +// CHECK-LABEL: @arm_sme_extract_tile_slice_i128 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128> -func.func @arm_sme_move_tile_slice_to_vector_i128(%tile_slice_index : index) -> vector<[1]xi128> { +func.func @arm_sme_extract_tile_slice_i128(%tile_slice_index : index) -> vector<[1]xi128> { %tile = arm_sme.get_tile : vector<[1]x[1]xi128> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128> return %slice : vector<[1]xi128> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_f16 +// CHECK-LABEL: @arm_sme_extract_tile_slice_f16 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[8]xf16>, vector<[8]xi1>, i32) -> vector<[8]xf16> -func.func @arm_sme_move_tile_slice_to_vector_f16(%tile_slice_index : index) -> vector<[8]xf16> { +func.func @arm_sme_extract_tile_slice_f16(%tile_slice_index : index) -> vector<[8]xf16> { %tile = arm_sme.get_tile : vector<[8]x[8]xf16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16> return %slice : vector<[8]xf16> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_bf16 +// CHECK-LABEL: @arm_sme_extract_tile_slice_bf16 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[8]xbf16>, vector<[8]xi1>, i32) -> vector<[8]xbf16> -func.func @arm_sme_move_tile_slice_to_vector_bf16(%tile_slice_index : index) -> vector<[8]xbf16> { +func.func @arm_sme_extract_tile_slice_bf16(%tile_slice_index : index) -> vector<[8]xbf16> { %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16> return %slice : vector<[8]xbf16> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_f32 +// CHECK-LABEL: @arm_sme_extract_tile_slice_f32 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[4]xf32>, vector<[4]xi1>, i32) -> vector<[4]xf32> -func.func @arm_sme_move_tile_slice_to_vector_f32(%tile_slice_index : index) -> vector<[4]xf32> { +func.func @arm_sme_extract_tile_slice_f32(%tile_slice_index : index) -> vector<[4]xf32> { %tile = arm_sme.get_tile : vector<[4]x[4]xf32> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> return %slice : vector<[4]xf32> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_f64 +// CHECK-LABEL: @arm_sme_extract_tile_slice_f64 // CHECK: "arm_sme.intr.read.horiz"({{.*}}) <{tile_id = 0 : i32}> : (vector<[2]xf64>, vector<[2]xi1>, i32) -> vector<[2]xf64> -func.func @arm_sme_move_tile_slice_to_vector_f64(%tile_slice_index : index) -> vector<[2]xf64> { +func.func @arm_sme_extract_tile_slice_f64(%tile_slice_index : index) -> vector<[2]xf64> { %tile = arm_sme.get_tile : vector<[2]x[2]xf64> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64> return %slice : vector<[2]xf64> } // ----- -// CHECK-LABEL: @arm_sme_move_tile_slice_to_vector_ver_i128 +// CHECK-LABEL: @arm_sme_extract_tile_slice_ver_i128 // CHECK: "arm_sme.intr.read.vert"({{.*}}) <{tile_id = 0 : i32}> : (vector<[1]xi128>, vector<[1]xi1>, i32) -> vector<[1]xi128> -func.func @arm_sme_move_tile_slice_to_vector_ver_i128(%tile_slice_index : index) -> vector<[1]xi128> { +func.func @arm_sme_extract_tile_slice_ver_i128(%tile_slice_index : index) -> vector<[1]xi128> { %tile = arm_sme.get_tile : vector<[1]x[1]xi128> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] layout : vector<[1]xi128> from vector<[1]x[1]xi128> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] layout : vector<[1]xi128> from vector<[1]x[1]xi128> return %slice : vector<[1]xi128> } diff --git a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir index a2f2beff78c409..4ae710aa291137 100644 --- a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir +++ b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir @@ -89,7 +89,7 @@ func.func @arm_sme_tile_load_hor_with_mask_and_pad_zero(%src : memref) // CHECK-NEXT: %[[OFFSET:.*]] = arith.addi %[[C0]], %[[TILE_SLICE_INDEX]] : index // CHECK: %[[PAD_1D:.*]] = vector.splat %[[PAD]] : vector<[4]xi32> // CHECK: %[[LOAD_SLICE:.*]] = vector.maskedload %[[SRC]]{{\[}}%[[OFFSET]], %[[C0]]], %[[MASK_1D]], %[[PAD_1D]] : memref, vector<[4]xi1>, vector<[4]xi32> into vector<[4]xi32> -// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.move_vector_to_tile_slice %[[LOAD_SLICE]], %[[CURRENT_TILE]], %[[TILE_SLICE_INDEX]] : vector<[4]xi32> into vector<[4]x[4]xi32> +// CHECK: %[[TILE_UPDATE:.*]] = arm_sme.insert_tile_slice %[[LOAD_SLICE]], %[[CURRENT_TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xi32> into vector<[4]x[4]xi32> // CHECK-NEXT: scf.yield %[[TILE_UPDATE]] : vector<[4]x[4]xi32> func.func @arm_sme_tile_load_hor_with_mask_and_nonzero_pad(%src : memref, %pad : i32) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir index 068fd0d04f1bc1..0f973af799634c 100644 --- a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir +++ b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir @@ -372,7 +372,7 @@ func.func @transfer_write_slice_with_mask(%vector: vector<[4]x[4]xf32>, %dest : // CHECK: arm_sme.store_tile_slice {{.*}} layout func.func @transfer_write_vertical_slice(%vector: vector<[4]x[4]xf32>, %dest : memref, %slice_index: index) { %c0 = arith.constant 0 : index - %slice = arm_sme.move_tile_slice_to_vector %vector[%slice_index] layout + %slice = arm_sme.extract_tile_slice %vector[%slice_index] layout : vector<[4]xf32> from vector<[4]x[4]xf32> vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = [true] }: vector<[4]xf32>, memref return @@ -394,7 +394,7 @@ func.func @transfer_write_vertical_slice(%vector: vector<[4]x[4]xf32>, %dest : m // CHECK: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index // CHECK: %[[TILE:.*]] = scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[INIT_TILE]]) -> (vector<[4]x[4]xi32>) { -// CHECK: %[[NEW_TILE:.*]] = arm_sme.move_vector_to_tile_slice %[[SRC_1D]], %[[CURRENT_TILE]], %[[TILE_SLICE_INDEX]] : vector<[4]xi32> into vector<[4]x[4]xi32> +// CHECK: %[[NEW_TILE:.*]] = arm_sme.insert_tile_slice %[[SRC_1D]], %[[CURRENT_TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xi32> into vector<[4]x[4]xi32> // CHECK: scf.yield %[[NEW_TILE]] : vector<[4]x[4]xi32> // CHECK: "prevent.dce"(%[[TILE]]) : (vector<[4]x[4]xi32>) -> () func.func @broadcast_vec2d_from_i32(%arg0: i32) { @@ -409,7 +409,7 @@ func.func @broadcast_vec2d_from_i32(%arg0: i32) { // CHECK-SAME: %[[SRC:.*]]: vector) { // CHECK: %[[SRC_1D:.*]] = vector.broadcast %[[SRC]] : vector to vector<[4]xf32> // CHECK: scf.for -// CHECK: arm_sme.move_vector_to_tile_slice %[[SRC_1D]], {{.*}} +// CHECK: arm_sme.insert_tile_slice %[[SRC_1D]], {{.*}} func.func @broadcast_vec2d_from_vec0d(%arg0: vector) { %0 = vector.broadcast %arg0 : vector to vector<[4]x[4]xf32> "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> () @@ -422,7 +422,7 @@ func.func @broadcast_vec2d_from_vec0d(%arg0: vector) { // CHECK-SAME: %[[SRC:.*]]: vector<[8]xi16>) { // CHECK-NOT: vector.broadcast // CHECK: scf.for -// CHECK: arm_sme.move_vector_to_tile_slice %[[SRC]], {{.*}} +// CHECK: arm_sme.insert_tile_slice %[[SRC]], {{.*}} func.func @broadcast_vec2d_from_vec1d(%arg0: vector<[8]xi16>) { %0 = vector.broadcast %arg0 : vector<[8]xi16> to vector<[8]x[8]xi16> "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> () @@ -442,7 +442,7 @@ func.func @broadcast_vec2d_from_vec1d(%arg0: vector<[8]xi16>) { // CHECK: %[[VSCALE:.*]] = vector.vscale // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %{{.*}} : index // CHECK: scf.for {{.*}} to %[[NUM_TILE_SLICES]] {{.*}} { -// CHECK: arm_sme.move_vector_to_tile_slice %[[BCST]], {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> +// CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> func.func @splat_vec2d_from_i32(%arg0: i32) { %0 = vector.splat %arg0 : vector<[4]x[4]xi32> "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> () @@ -455,7 +455,7 @@ func.func @splat_vec2d_from_i32(%arg0: i32) { // CHECK-SAME: %[[SRC:.*]]: f16) { // CHECK: %[[BCST:.*]] = vector.broadcast %[[SRC]] : f16 to vector<[8]xf16> // CHECK: scf.for -// CHECK: arm_sme.move_vector_to_tile_slice %[[BCST]], {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> +// CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> func.func @splat_vec2d_from_f16(%arg0: f16) { %0 = vector.splat %arg0 : vector<[8]x[8]xf16> "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> () @@ -695,7 +695,7 @@ func.func @vector_print_tile(%tile: vector<[4]x[4]xf32>) // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale // CHECK-DAG: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] { -// CHECK-NEXT: %[[TILE_SLICE:.*]] = arm_sme.move_tile_slice_to_vector %[[TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xf32> from vector<[4]x[4]xf32> +// CHECK-NEXT: %[[TILE_SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xf32> from vector<[4]x[4]xf32> // CHECK-NEXT: vector.print %[[TILE_SLICE]] : vector<[4]xf32> //===----------------------------------------------------------------------===// @@ -925,7 +925,7 @@ func.func @vector_store_i128(%arg0 : memref) { // CHECK-SAME: %[[INDEX:.*]]: index) func.func @vector_insert_slice_i32(%slice: vector<[4]xi32>, %row: index) -> vector<[4]x[4]xi32>{ // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32> - // CHECK-NEXT: arm_sme.move_vector_to_tile_slice %[[SLICE]], %[[TILE]], %[[INDEX]] : vector<[4]xi32> into vector<[4]x[4]xi32> + // CHECK-NEXT: arm_sme.insert_tile_slice %[[SLICE]], %[[TILE]][%[[INDEX]]] : vector<[4]xi32> into vector<[4]x[4]xi32> %tile = arm_sme.get_tile : vector<[4]x[4]xi32> %new_tile = vector.insert %slice, %tile[%row] : vector<[4]xi32> into vector<[4]x[4]xi32> return %new_tile : vector<[4]x[4]xi32> @@ -935,7 +935,7 @@ func.func @vector_insert_slice_i32(%slice: vector<[4]xi32>, %row: index) -> vect // CHECK-LABEL: @vector_insert_slice_i8 func.func @vector_insert_slice_i8(%slice: vector<[16]xi8>, %row: index) -> vector<[16]x[16]xi8> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> %tile = arm_sme.get_tile : vector<[16]x[16]xi8> %new_tile = vector.insert %slice, %tile[%row] : vector<[16]xi8> into vector<[16]x[16]xi8> return %new_tile : vector<[16]x[16]xi8> @@ -945,7 +945,7 @@ func.func @vector_insert_slice_i8(%slice: vector<[16]xi8>, %row: index) -> vecto // CHECK-LABEL: @vector_insert_slice_i16 func.func @vector_insert_slice_i16(%slice: vector<[8]xi16>, %row: index) -> vector<[8]x[8]xi16> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> %tile = arm_sme.get_tile : vector<[8]x[8]xi16> %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xi16> into vector<[8]x[8]xi16> return %new_tile : vector<[8]x[8]xi16> @@ -955,7 +955,7 @@ func.func @vector_insert_slice_i16(%slice: vector<[8]xi16>, %row: index) -> vect // CHECK-LABEL: @vector_insert_slice_i64 func.func @vector_insert_slice_i64(%slice: vector<[2]xi64>, %row: index) -> vector<[2]x[2]xi64> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> %tile = arm_sme.get_tile : vector<[2]x[2]xi64> %new_tile = vector.insert %slice, %tile[%row] : vector<[2]xi64> into vector<[2]x[2]xi64> return %new_tile : vector<[2]x[2]xi64> @@ -965,7 +965,7 @@ func.func @vector_insert_slice_i64(%slice: vector<[2]xi64>, %row: index) -> vect // CHECK-LABEL: @vector_insert_slice_i128 func.func @vector_insert_slice_i128(%slice: vector<[1]xi128>, %row: index) -> vector<[1]x[1]xi128> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> %tile = arm_sme.get_tile : vector<[1]x[1]xi128> %new_tile = vector.insert %slice, %tile[%row] : vector<[1]xi128> into vector<[1]x[1]xi128> return %new_tile : vector<[1]x[1]xi128> @@ -975,7 +975,7 @@ func.func @vector_insert_slice_i128(%slice: vector<[1]xi128>, %row: index) -> ve // CHECK-LABEL: @vector_insert_slice_f16 func.func @vector_insert_slice_f16(%slice: vector<[8]xf16>, %row: index) -> vector<[8]x[8]xf16> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> %tile = arm_sme.get_tile : vector<[8]x[8]xf16> %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xf16> into vector<[8]x[8]xf16> return %new_tile : vector<[8]x[8]xf16> @@ -985,7 +985,7 @@ func.func @vector_insert_slice_f16(%slice: vector<[8]xf16>, %row: index) -> vect // CHECK-LABEL: @vector_insert_slice_bf16 func.func @vector_insert_slice_bf16(%slice: vector<[8]xbf16>, %row: index) -> vector<[8]x[8]xbf16> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xbf16> into vector<[8]x[8]xbf16> return %new_tile : vector<[8]x[8]xbf16> @@ -995,7 +995,7 @@ func.func @vector_insert_slice_bf16(%slice: vector<[8]xbf16>, %row: index) -> ve // CHECK-LABEL: @vector_insert_slice_f32 func.func @vector_insert_slice_f32(%slice: vector<[4]xf32>, %row: index) -> vector<[4]x[4]xf32> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> %tile = arm_sme.get_tile : vector<[4]x[4]xf32> %new_tile = vector.insert %slice, %tile[%row] : vector<[4]xf32> into vector<[4]x[4]xf32> return %new_tile : vector<[4]x[4]xf32> @@ -1005,7 +1005,7 @@ func.func @vector_insert_slice_f32(%slice: vector<[4]xf32>, %row: index) -> vect // CHECK-LABEL: @vector_insert_slice_f64 func.func @vector_insert_slice_f64(%slice: vector<[2]xf64>, %row: index) -> vector<[2]x[2]xf64> { - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> + // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> %tile = arm_sme.get_tile : vector<[2]x[2]xf64> %new_tile = vector.insert %slice, %tile[%row] : vector<[2]xf64> into vector<[2]x[2]xf64> return %new_tile : vector<[2]x[2]xf64> @@ -1019,9 +1019,9 @@ func.func @vector_insert_slice_f64(%slice: vector<[2]xf64>, %row: index) -> vect // CHECK-SAME: %[[COL:.*]]: index) func.func @vector_insert_element_i32(%el: i32, %row: index, %col: index) -> vector<[4]x[4]xi32> { // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32> - // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32> + // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32> // CHECK-NEXT: %[[NEW_SLICE:.*]] = vector.insert %[[EL]], %[[SLICE]] [%[[COL]]] : i32 into vector<[4]xi32> - // CHECK-NEXT: arm_sme.move_vector_to_tile_slice %[[NEW_SLICE]], %[[TILE]], %[[ROW]] : vector<[4]xi32> into vector<[4]x[4]xi32> + // CHECK-NEXT: arm_sme.insert_tile_slice %[[NEW_SLICE]], %[[TILE]][%[[ROW]]] : vector<[4]xi32> into vector<[4]x[4]xi32> %tile = arm_sme.get_tile : vector<[4]x[4]xi32> %new_tile = vector.insert %el, %tile[%row, %col] : i32 into vector<[4]x[4]xi32> return %new_tile : vector<[4]x[4]xi32> @@ -1032,8 +1032,8 @@ func.func @vector_insert_element_i32(%el: i32, %row: index, %col: index) -> vect // CHECK-LABEL: @vector_insert_element_i8 func.func @vector_insert_element_i8(%el: i8, %row: index, %col: index) -> vector<[16]x[16]xi8> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[16]x[16]xi8> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[16]xi8> into vector<[16]x[16]xi8> %tile = arm_sme.get_tile : vector<[16]x[16]xi8> %new_tile = vector.insert %el, %tile[%row, %col] : i8 into vector<[16]x[16]xi8> return %new_tile : vector<[16]x[16]xi8> @@ -1044,8 +1044,8 @@ func.func @vector_insert_element_i8(%el: i8, %row: index, %col: index) -> vector // CHECK-LABEL: @vector_insert_element_i16 func.func @vector_insert_element_i16(%el: i16, %row: index, %col: index) -> vector<[8]x[8]xi16> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xi16> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xi16> into vector<[8]x[8]xi16> %tile = arm_sme.get_tile : vector<[8]x[8]xi16> %new_tile = vector.insert %el, %tile[%row, %col] : i16 into vector<[8]x[8]xi16> return %new_tile : vector<[8]x[8]xi16> @@ -1056,8 +1056,8 @@ func.func @vector_insert_element_i16(%el: i16, %row: index, %col: index) -> vect // CHECK-LABEL: @vector_insert_element_i64 func.func @vector_insert_element_i64(%el: i64, %row: index, %col: index) -> vector<[2]x[2]xi64> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[2]x[2]xi64> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[2]xi64> into vector<[2]x[2]xi64> %tile = arm_sme.get_tile : vector<[2]x[2]xi64> %new_tile = vector.insert %el, %tile[%row, %col] : i64 into vector<[2]x[2]xi64> return %new_tile : vector<[2]x[2]xi64> @@ -1068,8 +1068,8 @@ func.func @vector_insert_element_i64(%el: i64, %row: index, %col: index) -> vect // CHECK-LABEL: @vector_insert_element_i128 func.func @vector_insert_element_i128(%el: i128, %row: index, %col: index) -> vector<[1]x[1]xi128> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[1]x[1]xi128> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[1]xi128> into vector<[1]x[1]xi128> %tile = arm_sme.get_tile : vector<[1]x[1]xi128> %new_tile = vector.insert %el, %tile[%row, %col] : i128 into vector<[1]x[1]xi128> return %new_tile : vector<[1]x[1]xi128> @@ -1080,8 +1080,8 @@ func.func @vector_insert_element_i128(%el: i128, %row: index, %col: index) -> ve // CHECK-LABEL: @vector_insert_element_f16 func.func @vector_insert_element_f16(%el: f16, %row: index, %col: index) -> vector<[8]x[8]xf16> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xf16> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xf16> into vector<[8]x[8]xf16> %tile = arm_sme.get_tile : vector<[8]x[8]xf16> %new_tile = vector.insert %el, %tile[%row, %col] : f16 into vector<[8]x[8]xf16> return %new_tile : vector<[8]x[8]xf16> @@ -1092,8 +1092,8 @@ func.func @vector_insert_element_f16(%el: f16, %row: index, %col: index) -> vect // CHECK-LABEL: @vector_insert_element_bf16 func.func @vector_insert_element_bf16(%el: bf16, %row: index, %col: index) -> vector<[8]x[8]xbf16> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xbf16> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xbf16> into vector<[8]x[8]xbf16> %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> %new_tile = vector.insert %el, %tile[%row, %col] : bf16 into vector<[8]x[8]xbf16> return %new_tile : vector<[8]x[8]xbf16> @@ -1104,8 +1104,8 @@ func.func @vector_insert_element_bf16(%el: bf16, %row: index, %col: index) -> ve // CHECK-LABEL: @vector_insert_element_f32 func.func @vector_insert_element_f32(%el: f32, %row: index, %col: index) -> vector<[4]x[4]xf32> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xf32> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[4]xf32> into vector<[4]x[4]xf32> %tile = arm_sme.get_tile : vector<[4]x[4]xf32> %new_tile = vector.insert %el, %tile[%row, %col] : f32 into vector<[4]x[4]xf32> return %new_tile : vector<[4]x[4]xf32> @@ -1116,15 +1116,15 @@ func.func @vector_insert_element_f32(%el: f32, %row: index, %col: index) -> vect // CHECK-LABEL: @vector_insert_element_f64 func.func @vector_insert_element_f64(%el: f64, %row: index, %col: index) -> vector<[2]x[2]xf64> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[2]x[2]xf64> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]]{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> - // CHECK: arm_sme.move_vector_to_tile_slice %{{.*}}, %[[TILE]], %{{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> + // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> + // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[2]xf64> into vector<[2]x[2]xf64> %tile = arm_sme.get_tile : vector<[2]x[2]xf64> %new_tile = vector.insert %el, %tile[%row, %col] : f64 into vector<[2]x[2]xf64> return %new_tile : vector<[2]x[2]xf64> } //===----------------------------------------------------------------------===// -// vector.extract --> arm_sme.move_tile_slice_to_vector +// vector.extract --> arm_sme.extract_tile_slice //===----------------------------------------------------------------------===// // ----- @@ -1133,7 +1133,7 @@ func.func @vector_insert_element_f64(%el: f64, %row: index, %col: index) -> vect // CHECK-SAME: %[[INDEX:.*]]: index) func.func @vector_extract_slice_i32(%row: index) -> vector<[4]xi32> { // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32> - // CHECK: arm_sme.move_tile_slice_to_vector %[[TILE]][%[[INDEX]]] : vector<[4]xi32> from vector<[4]x[4]xi32> + // CHECK: arm_sme.extract_tile_slice %[[TILE]][%[[INDEX]]] : vector<[4]xi32> from vector<[4]x[4]xi32> %tile = arm_sme.get_tile : vector<[4]x[4]xi32> %slice = vector.extract %tile[%row] : vector<[4]xi32> from vector<[4]x[4]xi32> return %slice : vector<[4]xi32> @@ -1143,7 +1143,7 @@ func.func @vector_extract_slice_i32(%row: index) -> vector<[4]xi32> { // CHECK-LABEL: @vector_extract_slice_i8 func.func @vector_extract_slice_i8(%row: index) -> vector<[16]xi8> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> %tile = arm_sme.get_tile : vector<[16]x[16]xi8> %slice = vector.extract %tile[%row] : vector<[16]xi8> from vector<[16]x[16]xi8> return %slice : vector<[16]xi8> @@ -1153,7 +1153,7 @@ func.func @vector_extract_slice_i8(%row: index) -> vector<[16]xi8> { // CHECK-LABEL: @vector_extract_slice_i16 func.func @vector_extract_slice_i16(%row: index) -> vector<[8]xi16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> %tile = arm_sme.get_tile : vector<[8]x[8]xi16> %slice = vector.extract %tile[%row] : vector<[8]xi16> from vector<[8]x[8]xi16> return %slice : vector<[8]xi16> @@ -1163,7 +1163,7 @@ func.func @vector_extract_slice_i16(%row: index) -> vector<[8]xi16> { // CHECK-LABEL: @vector_extract_slice_i64 func.func @vector_extract_slice_i64(%row: index) -> vector<[2]xi64> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> %tile = arm_sme.get_tile : vector<[2]x[2]xi64> %slice = vector.extract %tile[%row] : vector<[2]xi64> from vector<[2]x[2]xi64> return %slice : vector<[2]xi64> @@ -1173,7 +1173,7 @@ func.func @vector_extract_slice_i64(%row: index) -> vector<[2]xi64> { // CHECK-LABEL: @vector_extract_slice_i128 func.func @vector_extract_slice_i128(%row: index) -> vector<[1]xi128> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> %tile = arm_sme.get_tile : vector<[1]x[1]xi128> %slice = vector.extract %tile[%row] : vector<[1]xi128> from vector<[1]x[1]xi128> return %slice : vector<[1]xi128> @@ -1183,7 +1183,7 @@ func.func @vector_extract_slice_i128(%row: index) -> vector<[1]xi128> { // CHECK-LABEL: @vector_extract_slice_f16 func.func @vector_extract_slice_f16(%row: index) -> vector<[8]xf16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> %tile = arm_sme.get_tile : vector<[8]x[8]xf16> %slice = vector.extract %tile[%row] : vector<[8]xf16> from vector<[8]x[8]xf16> return %slice : vector<[8]xf16> @@ -1193,7 +1193,7 @@ func.func @vector_extract_slice_f16(%row: index) -> vector<[8]xf16> { // CHECK-LABEL: @vector_extract_slice_bf16 func.func @vector_extract_slice_bf16(%row: index) -> vector<[8]xbf16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> %slice = vector.extract %tile[%row] : vector<[8]xbf16> from vector<[8]x[8]xbf16> return %slice : vector<[8]xbf16> @@ -1203,7 +1203,7 @@ func.func @vector_extract_slice_bf16(%row: index) -> vector<[8]xbf16> { // CHECK-LABEL: @vector_extract_slice_f32 func.func @vector_extract_slice_f32(%row: index) -> vector<[4]xf32> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> %tile = arm_sme.get_tile : vector<[4]x[4]xf32> %slice = vector.extract %tile[%row] : vector<[4]xf32> from vector<[4]x[4]xf32> return %slice : vector<[4]xf32> @@ -1213,7 +1213,7 @@ func.func @vector_extract_slice_f32(%row: index) -> vector<[4]xf32> { // CHECK-LABEL: @vector_extract_slice_f64 func.func @vector_extract_slice_f64(%row: index) -> vector<[2]xf64> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> %tile = arm_sme.get_tile : vector<[2]x[2]xf64> %slice = vector.extract %tile[%row] : vector<[2]xf64> from vector<[2]x[2]xf64> return %slice : vector<[2]xf64> @@ -1226,7 +1226,7 @@ func.func @vector_extract_slice_f64(%row: index) -> vector<[2]xf64> { // CHECK-SAME: %[[COL:.*]]: index) func.func @vector_extract_element(%row: index, %col: index) -> i32 { // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32> - // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32> + // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32> // CHECK-NEXT: %[[EL:.*]] = vector.extract %[[SLICE]]{{\[}}%[[COL]]] : i32 from vector<[4]xi32> %tile = arm_sme.get_tile : vector<[4]x[4]xi32> %el = vector.extract %tile[%row, %col] : i32 from vector<[4]x[4]xi32> @@ -1237,7 +1237,7 @@ func.func @vector_extract_element(%row: index, %col: index) -> i32 { // CHECK-LABEL: @vector_extract_element_i8 func.func @vector_extract_element_i8(%row: index, %col: index) -> i8 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i8 from vector<[16]xi8> %tile = arm_sme.get_tile : vector<[16]x[16]xi8> %el = vector.extract %tile[%row, %col] : i8 from vector<[16]x[16]xi8> @@ -1248,7 +1248,7 @@ func.func @vector_extract_element_i8(%row: index, %col: index) -> i8 { // CHECK-LABEL: @vector_extract_element_i16 func.func @vector_extract_element_i16(%row: index, %col: index) -> i16 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i16 from vector<[8]xi16> %tile = arm_sme.get_tile : vector<[8]x[8]xi16> %el = vector.extract %tile[%row, %col] : i16 from vector<[8]x[8]xi16> @@ -1259,7 +1259,7 @@ func.func @vector_extract_element_i16(%row: index, %col: index) -> i16 { // CHECK-LABEL: @vector_extract_element_i64 func.func @vector_extract_element_i64(%row: index, %col: index) -> i64 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i64 from vector<[2]xi64> %tile = arm_sme.get_tile : vector<[2]x[2]xi64> %el = vector.extract %tile[%row, %col] : i64 from vector<[2]x[2]xi64> @@ -1270,7 +1270,7 @@ func.func @vector_extract_element_i64(%row: index, %col: index) -> i64 { // CHECK-LABEL: @vector_extract_element_i128 func.func @vector_extract_element_i128(%row: index, %col: index) -> i128 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i128 from vector<[1]xi128> %tile = arm_sme.get_tile : vector<[1]x[1]xi128> %el = vector.extract %tile[%row, %col] : i128 from vector<[1]x[1]xi128> @@ -1281,7 +1281,7 @@ func.func @vector_extract_element_i128(%row: index, %col: index) -> i128 { // CHECK-LABEL: @vector_extract_element_f16 func.func @vector_extract_element_f16(%row: index, %col: index) -> f16 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f16 from vector<[8]xf16> %tile = arm_sme.get_tile : vector<[8]x[8]xf16> %el = vector.extract %tile[%row, %col] : f16 from vector<[8]x[8]xf16> @@ -1292,7 +1292,7 @@ func.func @vector_extract_element_f16(%row: index, %col: index) -> f16 { // CHECK-LABEL: @vector_extract_element_bf16 func.func @vector_extract_element_bf16(%row: index, %col: index) -> bf16 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : bf16 from vector<[8]xbf16> %tile = arm_sme.get_tile : vector<[8]x[8]xbf16> %el = vector.extract %tile[%row, %col] : bf16 from vector<[8]x[8]xbf16> @@ -1303,7 +1303,7 @@ func.func @vector_extract_element_bf16(%row: index, %col: index) -> bf16 { // CHECK-LABEL: @vector_extract_element_f32 func.func @vector_extract_element_f32(%row: index, %col: index) -> f32 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f32 from vector<[4]xf32> %tile = arm_sme.get_tile : vector<[4]x[4]xf32> %el = vector.extract %tile[%row, %col] : f32 from vector<[4]x[4]xf32> @@ -1314,7 +1314,7 @@ func.func @vector_extract_element_f32(%row: index, %col: index) -> f32 { // CHECK-LABEL: @vector_extract_element_f64 func.func @vector_extract_element_f64(%row: index, %col: index) -> f64 { - // CHECK: %[[SLICE:.*]] = arm_sme.move_tile_slice_to_vector %{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> + // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f64 from vector<[2]xf64> %tile = arm_sme.get_tile : vector<[2]x[2]xf64> %el = vector.extract %tile[%row, %col] : f64 from vector<[2]x[2]xf64> diff --git a/mlir/test/Dialect/ArmSME/invalid.mlir b/mlir/test/Dialect/ArmSME/invalid.mlir index cc052fac0d9dc9..700b2412ff7a7c 100644 --- a/mlir/test/Dialect/ArmSME/invalid.mlir +++ b/mlir/test/Dialect/ArmSME/invalid.mlir @@ -45,36 +45,36 @@ func.func @arm_sme_get_tile__bad_shape(%tile_id : i8) -> vector<[4]x[16]xi8> { } //===----------------------------------------------------------------------===// -// arm_sme.move_vector_to_tile_slice +// arm_sme.insert_tile_slice //===----------------------------------------------------------------------===// // ----- -func.func @arm_sme_move_vector_to_tile_slice_i8__bad_vector_type(%vector : vector<[8]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]x[16]xi8> { +func.func @arm_sme_insert_tile_slice_i8__bad_vector_type(%vector : vector<[8]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]x[16]xi8> { %c0 = arith.constant 0 : index // expected-error@+1 {{op failed to verify that type of 'vector' matches type of 'tile' slice}} - %0 = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[8]xi8> into vector<[16]x[16]xi8> + %0 = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xi8> into vector<[16]x[16]xi8> return %0 : vector<[16]x[16]xi8> } // ----- -func.func @arm_sme_move_vector_to_tile_slice_f32__bad_vector_type(%vector : vector<[8]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]x[4]xf32> { +func.func @arm_sme_insert_tile_slice_f32__bad_vector_type(%vector : vector<[8]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]x[4]xf32> { %c0 = arith.constant 0 : index // expected-error@+1 {{op failed to verify that type of 'vector' matches type of 'tile' slice}} - %0 = arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[8]xf32> into vector<[4]x[4]xf32> + %0 = arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xf32> into vector<[4]x[4]xf32> return %0 : vector<[4]x[4]xf32> } //===----------------------------------------------------------------------===// -// arm_sme.move_tile_slice_to_vector +// arm_sme.extract_tile_slice //===----------------------------------------------------------------------===// // ----- -func.func @arm_sme_move_tile_slice_to_vector__bad_result_type(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[2]xf64> { +func.func @arm_sme_extract_tile_slice__bad_result_type(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[2]xf64> { // expected-error@+1 {{op failed to verify that type of 'result' matches type of 'tile' slice}} - %0 = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xf64> from vector<[4]x[4]xf32> + %0 = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xf64> from vector<[4]x[4]xf32> return %0 : vector<[2]xf64> } diff --git a/mlir/test/Dialect/ArmSME/roundtrip.mlir b/mlir/test/Dialect/ArmSME/roundtrip.mlir index 6095fdc11ead8f..c326895aad698e 100644 --- a/mlir/test/Dialect/ArmSME/roundtrip.mlir +++ b/mlir/test/Dialect/ArmSME/roundtrip.mlir @@ -875,180 +875,180 @@ func.func @arm_sme_store_tile_slice_hor_i8(%tile : vector<[16]x[16]xi8>, %tile_s } //===----------------------------------------------------------------------===// -// arm_sme.move_vector_to_tile_slice +// arm_sme.insert_tile_slice //===----------------------------------------------------------------------===// // ----- -func.func @arm_sme_move_vector_to_tile_slice_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> +func.func @arm_sme_insert_tile_slice_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[16]xi8> into vector<[16]x[16]xi8> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[16]xi8> into vector<[16]x[16]xi8> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_i16(%vector : vector<[8]xi16>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> +func.func @arm_sme_insert_tile_slice_i16(%vector : vector<[8]xi16>, %tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[8]xi16> into vector<[8]x[8]xi16> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xi16> into vector<[8]x[8]xi16> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_i32(%vector : vector<[4]xi32>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> +func.func @arm_sme_insert_tile_slice_i32(%vector : vector<[4]xi32>, %tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[4]xi32> into vector<[4]x[4]xi32> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xi32> into vector<[4]x[4]xi32> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_i64(%vector : vector<[2]xi64>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> +func.func @arm_sme_insert_tile_slice_i64(%vector : vector<[2]xi64>, %tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[2]xi64> into vector<[2]x[2]xi64> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xi64> into vector<[2]x[2]xi64> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_i128(%vector : vector<[1]xi128>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> +func.func @arm_sme_insert_tile_slice_i128(%vector : vector<[1]xi128>, %tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[1]xi128> into vector<[1]x[1]xi128> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[1]xi128> into vector<[1]x[1]xi128> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_f16(%vector : vector<[8]xf16>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> +func.func @arm_sme_insert_tile_slice_f16(%vector : vector<[8]xf16>, %tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[8]xf16> into vector<[8]x[8]xf16> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xf16> into vector<[8]x[8]xf16> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_bf16(%vector : vector<[8]xbf16>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> +func.func @arm_sme_insert_tile_slice_bf16(%vector : vector<[8]xbf16>, %tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[8]xbf16> into vector<[8]x[8]xbf16> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[8]xbf16> into vector<[8]x[8]xbf16> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_f32(%vector : vector<[4]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> +func.func @arm_sme_insert_tile_slice_f32(%vector : vector<[4]xf32>, %tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[4]xf32> into vector<[4]x[4]xf32> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[4]xf32> into vector<[4]x[4]xf32> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_f64(%vector : vector<[2]xf64>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> +func.func @arm_sme_insert_tile_slice_f64(%vector : vector<[2]xf64>, %tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index : vector<[2]xf64> into vector<[2]x[2]xf64> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] : vector<[2]xf64> into vector<[2]x[2]xf64> return } // ----- -func.func @arm_sme_move_vector_to_tile_slice_ver_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} layout : vector<[16]xi8> into vector<[16]x[16]xi8> +func.func @arm_sme_insert_tile_slice_ver_i8(%vector : vector<[16]xi8>, %tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> () { + // CHECK: arm_sme.insert_tile_slice {{.*}} layout : vector<[16]xi8> into vector<[16]x[16]xi8> %c0 = arith.constant 0 : index - arm_sme.move_vector_to_tile_slice %vector, %tile, %tile_slice_index layout : vector<[16]xi8> into vector<[16]x[16]xi8> + arm_sme.insert_tile_slice %vector, %tile[%tile_slice_index] layout : vector<[16]xi8> into vector<[16]x[16]xi8> return } //===----------------------------------------------------------------------===// -// arm_sme.move_tile_slice_to_vector +// arm_sme.extract_tile_slice //===----------------------------------------------------------------------===// // ----- -func.func @arm_sme_move_tile_slice_to_vector_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]xi8> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> +func.func @arm_sme_extract_tile_slice_i8(%tile : vector<[16]x[16]xi8>, %tile_slice_index : index) -> vector<[16]xi8> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[16]xi8> from vector<[16]x[16]xi8> return %slice : vector<[16]xi8> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> vector<[8]xi16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16> +func.func @arm_sme_extract_tile_slice_i16(%tile : vector<[8]x[8]xi16>, %tile_slice_index : index) -> vector<[8]xi16> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xi16> from vector<[8]x[8]xi16> return %slice : vector<[8]xi16> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> vector<[4]xi32> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[4]xi32> from vector<[4]x[4]xi32> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32> +func.func @arm_sme_extract_tile_slice_i32(%tile : vector<[4]x[4]xi32>, %tile_slice_index : index) -> vector<[4]xi32> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xi32> from vector<[4]x[4]xi32> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xi32> from vector<[4]x[4]xi32> return %slice : vector<[4]xi32> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> vector<[2]xi64> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64> +func.func @arm_sme_extract_tile_slice_i64(%tile : vector<[2]x[2]xi64>, %tile_slice_index : index) -> vector<[2]xi64> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xi64> from vector<[2]x[2]xi64> return %slice : vector<[2]xi64> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> vector<[1]xi128> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128> +func.func @arm_sme_extract_tile_slice_i128(%tile : vector<[1]x[1]xi128>, %tile_slice_index : index) -> vector<[1]xi128> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[1]xi128> from vector<[1]x[1]xi128> return %slice : vector<[1]xi128> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> vector<[8]xf16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16> +func.func @arm_sme_extract_tile_slice_f16(%tile : vector<[8]x[8]xf16>, %tile_slice_index : index) -> vector<[8]xf16> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xf16> from vector<[8]x[8]xf16> return %slice : vector<[8]xf16> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> vector<[8]xbf16> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16> +func.func @arm_sme_extract_tile_slice_bf16(%tile : vector<[8]x[8]xbf16>, %tile_slice_index : index) -> vector<[8]xbf16> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[8]xbf16> from vector<[8]x[8]xbf16> return %slice : vector<[8]xbf16> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]xf32> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> +func.func @arm_sme_extract_tile_slice_f32(%tile : vector<[4]x[4]xf32>, %tile_slice_index : index) -> vector<[4]xf32> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32> return %slice : vector<[4]xf32> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64> +func.func @arm_sme_extract_tile_slice_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { + // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] : vector<[2]xf64> from vector<[2]x[2]xf64> return %slice : vector<[2]xf64> } // ----- -func.func @arm_sme_move_tile_slice_to_vector_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { - // CHECK: arm_sme.move_tile_slice_to_vector {{.*}} layout : vector<[2]xf64> from vector<[2]x[2]xf64> - %slice = arm_sme.move_tile_slice_to_vector %tile[%tile_slice_index] layout : vector<[2]xf64> from vector<[2]x[2]xf64> +func.func @arm_sme_extract_tile_slice_ver_f64(%tile : vector<[2]x[2]xf64>, %tile_slice_index : index) -> vector<[2]xf64> { + // CHECK: arm_sme.extract_tile_slice {{.*}} layout : vector<[2]xf64> from vector<[2]x[2]xf64> + %slice = arm_sme.extract_tile_slice %tile[%tile_slice_index] layout : vector<[2]xf64> from vector<[2]x[2]xf64> return %slice : vector<[2]xf64> } diff --git a/mlir/test/Dialect/ArmSME/tile-allocation-copies.mlir b/mlir/test/Dialect/ArmSME/tile-allocation-copies.mlir index 6d9cbf36a162ff..2327f74e7a5f93 100644 --- a/mlir/test/Dialect/ArmSME/tile-allocation-copies.mlir +++ b/mlir/test/Dialect/ArmSME/tile-allocation-copies.mlir @@ -59,7 +59,7 @@ func.func @cond_branch(%cond: i1, %tile: vector<[4]x[4]xf32>) { // CHECK: ^[[BB2_COPIES]]: // CHECK-NEXT: cf.br ^[[BB2:[[:alnum:]]+]] // CHECK: ^[[BB2]]: -// CHECK-NEXT: %[[NEXT_TILE:.*]] = arm_sme.move_vector_to_tile_slice %{{.*}}, %[[ITER_TILE]] +// CHECK-NEXT: %[[NEXT_TILE:.*]] = arm_sme.insert_tile_slice %{{.*}}, %[[ITER_TILE]] // CHECK: %[[BB1_COPY_1:.*]] = arm_sme.copy_tile %[[NEXT_TILE]] : vector<[4]x[4]xf32> // CHECK: cf.br ^bb1(%{{[[:alnum:]]+}}, %[[BB1_COPY_1]] // CHECK: ^[[BB3]](%{{.*}}: vector<[4]x[4]xf32>): @@ -80,7 +80,7 @@ func.func @cond_branch_with_backedge(%tileA: vector<[4]x[4]xf32>, %tileB: vector cf.cond_br %continueLoop, ^bb2, ^bb3(%iterTile, %tileB, %tileC, %tileD : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>) ^bb2: // Live here: %iterTile, %tileB, %tileC, %tileD - %nextTile = arm_sme.move_vector_to_tile_slice %slice, %iterTile, %currentIndex : vector<[4]xf32> into vector<[4]x[4]xf32> + %nextTile = arm_sme.insert_tile_slice %slice, %iterTile[%currentIndex] : vector<[4]xf32> into vector<[4]x[4]xf32> %nextIndex = arith.addi %currentIndex, %c1 : index cf.br ^bb1(%nextIndex, %nextTile : index, vector<[4]x[4]xf32>) ^bb3(%finalTileA: vector<[4]x[4]xf32>, %finalTileB: vector<[4]x[4]xf32>, %finalTileC: vector<[4]x[4]xf32>, %finalTileD: vector<[4]x[4]xf32>): diff --git a/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir b/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir index 2e1f3d1ee10a9b..0b739feaf019d7 100644 --- a/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir +++ b/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir @@ -9,8 +9,8 @@ // CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: // CHECK-LIVE-RANGE: ^bb0: // CHECK-LIVE-RANGE: S arm_sme.zero -// CHECK-LIVE-RANGE-NEXT: |S arm_sme.move_vector_to_tile_slice -// CHECK-LIVE-RANGE-NEXT: || arm_sme.move_vector_to_tile_slice +// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice +// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice // CHECK-LIVE-RANGE-NEXT: |E test.some_use // CHECK-LIVE-RANGE-NEXT: E test.some_use @@ -19,11 +19,11 @@ func.func @constant_with_multiple_users(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) { // CHECK-NEXT: %[[ZERO_TILE_0:.*]] = arm_sme.zero {tile_id = 0 : i32} : vector<[4]x[4]xf32> // CHECK-NEXT: %[[ZERO_TILE_1:.*]] = arm_sme.zero {tile_id = 1 : i32} : vector<[4]x[4]xf32> - // CHECK-NEXT: %[[INSERT_TILE_1:.*]] = arm_sme.move_vector_to_tile_slice %[[VECTOR_A]], %[[ZERO_TILE_1]], %{{.*}} {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> - // CHECK-NEXT: %[[INSERT_TILE_0:.*]] = arm_sme.move_vector_to_tile_slice %[[VECTOR_B]], %[[ZERO_TILE_0]], %{{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK-NEXT: %[[INSERT_TILE_1:.*]] = arm_sme.insert_tile_slice %[[VECTOR_A]], %[[ZERO_TILE_1]][%{{.*}}] {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK-NEXT: %[[INSERT_TILE_0:.*]] = arm_sme.insert_tile_slice %[[VECTOR_B]], %[[ZERO_TILE_0]][%{{.*}}] {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> %zero = arm_sme.zero : vector<[4]x[4]xf32> - %tile_a = arm_sme.move_vector_to_tile_slice %a, %zero, %index : vector<[4]xf32> into vector<[4]x[4]xf32> - %tile_b = arm_sme.move_vector_to_tile_slice %b, %zero, %index : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_a = arm_sme.insert_tile_slice %a, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_b = arm_sme.insert_tile_slice %b, %zero[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () return @@ -34,16 +34,16 @@ func.func @constant_with_multiple_users(%a: vector<[4]xf32>, %b: vector<[4]xf32> // CHECK-LIVE-RANGE-LABEL: @value_with_multiple_users // CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: // CHECK-LIVE-RANGE: ^bb0: -// CHECK-LIVE-RANGE-NEXT: |S arm_sme.move_vector_to_tile_slice -// CHECK-LIVE-RANGE-NEXT: || arm_sme.move_vector_to_tile_slice +// CHECK-LIVE-RANGE-NEXT: |S arm_sme.insert_tile_slice +// CHECK-LIVE-RANGE-NEXT: || arm_sme.insert_tile_slice // CHECK-LIVE-RANGE-NEXT: |E test.some_use // CHECK-LIVE-RANGE-NEXT: E test.some_use // expected-note@below {{tile operand is: of type 'vector<[4]x[4]xf32>'}} func.func @value_with_multiple_users(%tile: vector<[4]x[4]xf32>, %a: vector<[4]xf32>, %b: vector<[4]xf32>, %index: index) { // expected-error@below {{op tile operand allocated to different SME virtial tile (move required)}} - %tile_a = arm_sme.move_vector_to_tile_slice %a, %tile, %index : vector<[4]xf32> into vector<[4]x[4]xf32> - %tile_b = arm_sme.move_vector_to_tile_slice %b, %tile, %index : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_a = arm_sme.insert_tile_slice %a, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_b = arm_sme.insert_tile_slice %b, %tile[%index] : vector<[4]xf32> into vector<[4]x[4]xf32> "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () return @@ -286,14 +286,14 @@ func.func @run_out_of_tiles_but_avoid_spill(%a: vector<[4]xf32>, %b: vector<[4]x iter_args(%iter_a = %init, %iter_b = %init, %iter_c = %init, %iter_d = %init) -> (vector<[4]x[4]xf32>, vector<[4]x[4]xf32> , vector<[4]x[4]xf32> , vector<[4]x[4]xf32>) { // ^bb2: - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 2 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 3 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> - // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> - %new_a = arm_sme.move_vector_to_tile_slice %a, %iter_a, %i : vector<[4]xf32> into vector<[4]x[4]xf32> - %new_b = arm_sme.move_vector_to_tile_slice %b, %iter_b, %i : vector<[4]xf32> into vector<[4]x[4]xf32> - %new_c = arm_sme.move_vector_to_tile_slice %c, %iter_c, %i : vector<[4]xf32> into vector<[4]x[4]xf32> - %new_d = arm_sme.move_vector_to_tile_slice %d, %iter_d, %i : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 1 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 2 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 3 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + // CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> + %new_a = arm_sme.insert_tile_slice %a, %iter_a[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> + %new_b = arm_sme.insert_tile_slice %b, %iter_b[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> + %new_c = arm_sme.insert_tile_slice %c, %iter_c[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> + %new_d = arm_sme.insert_tile_slice %d, %iter_d[%i] : vector<[4]xf32> into vector<[4]x[4]xf32> scf.yield %new_a, %new_b, %new_c, %new_d : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32> } // Live = %init, %tile_a, %tile_b, %tile_c, %tile_d (out of tiles!) @@ -316,10 +316,10 @@ func.func @run_out_of_tiles_but_avoid_spill(%a: vector<[4]xf32>, %b: vector<[4]x // CHECK-LIVE-RANGE: ========== Coalesced Live Ranges: // CHECK-LIVE-RANGE: ^bb2: // CHECK-LIVE-RANGE-NEXT: || test.some_use -// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.move_vector_to_tile_slice -// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.move_vector_to_tile_slice -// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.move_vector_to_tile_slice -// CHECK-LIVE-RANGE-NEXT: |||||S arm_sme.move_vector_to_tile_slice +// CHECK-LIVE-RANGE-NEXT: ||S arm_sme.insert_tile_slice +// CHECK-LIVE-RANGE-NEXT: |||S arm_sme.insert_tile_slice +// CHECK-LIVE-RANGE-NEXT: ||||S arm_sme.insert_tile_slice +// CHECK-LIVE-RANGE-NEXT: |||||S arm_sme.insert_tile_slice // CHECK-LIVE-RANGE-NEXT: ||E||| test.some_use // CHECK-LIVE-RANGE-NEXT: || E|| test.some_use // CHECK-LIVE-RANGE-NEXT: || E| test.some_use @@ -346,10 +346,10 @@ func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector< // So spilled here (unnecessarily). // The arm_sme.zero op could be moved into the loop to avoid this. "test.some_use"(%zero) : (vector<[4]x[4]xf32>) -> () - %tile_a = arm_sme.move_vector_to_tile_slice %a, %tile, %c0 : vector<[4]xf32> into vector<[4]x[4]xf32> - %tile_b = arm_sme.move_vector_to_tile_slice %b, %tile, %c0 : vector<[4]xf32> into vector<[4]x[4]xf32> - %tile_c = arm_sme.move_vector_to_tile_slice %c, %tile, %c0 : vector<[4]xf32> into vector<[4]x[4]xf32> - %tile_d = arm_sme.move_vector_to_tile_slice %d, %tile, %c0 : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_a = arm_sme.insert_tile_slice %a, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_b = arm_sme.insert_tile_slice %b, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_c = arm_sme.insert_tile_slice %c, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> + %tile_d = arm_sme.insert_tile_slice %d, %tile[%c0] : vector<[4]xf32> into vector<[4]x[4]xf32> // %zero is still live here (due the the backedge) "test.some_use"(%tile_a) : (vector<[4]x[4]xf32>) -> () "test.some_use"(%tile_b) : (vector<[4]x[4]xf32>) -> () @@ -405,7 +405,7 @@ func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector< // CHECK: arm_sme.get_tile {tile_id = 1 : i32} : vector<[4]x[4]xf32> // CHECK: arm_sme.get_tile {tile_id = 2 : i32} : vector<[4]x[4]xf32> // CHECK: arm_sme.get_tile {tile_id = 3 : i32} : vector<[4]x[4]xf32> -// CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> +// CHECK: arm_sme.insert_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> // CHECK-NOT: tile_id = 16 func.func @cond_branch_with_backedge(%slice: vector<[4]xf32>) { %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> @@ -423,7 +423,7 @@ func.func @cond_branch_with_backedge(%slice: vector<[4]xf32>) { cf.cond_br %continueLoop, ^bb2, ^bb3(%iterTile, %tileB, %tileC, %tileD : vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>, vector<[4]x[4]xf32>) ^bb2: // Live here: %iterTile, %tileB, %tileC, %tileD - %nextTile = arm_sme.move_vector_to_tile_slice %slice, %iterTile, %currentIndex : vector<[4]xf32> into vector<[4]x[4]xf32> + %nextTile = arm_sme.insert_tile_slice %slice, %iterTile[%currentIndex] : vector<[4]xf32> into vector<[4]x[4]xf32> %nextIndex = arith.addi %currentIndex, %c1 : index cf.br ^bb1(%nextIndex, %nextTile : index, vector<[4]x[4]xf32>) ^bb3(%finalTileA: vector<[4]x[4]xf32>, %finalTileB: vector<[4]x[4]xf32>, %finalTileC: vector<[4]x[4]xf32>, %finalTileD: vector<[4]x[4]xf32>): diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index 684e14dfbb7d5d..1ce4adef896f79 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -309,17 +309,13 @@ define void @class_method() { ret void, !dbg !9 } -; Verify the cyclic composite type is identified, even though conversion begins from the subprogram type. -; CHECK-DAG: #[[COMP_SELF:.+]] = #llvm.di_composite_type -; CHECK-DAG: #[[COMP_PTR:.+]] = #llvm.di_derived_type +; Verify the cyclic subprogram is handled correctly. +; CHECK-DAG: #[[SP_SELF:.+]] = #llvm.di_subprogram +; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type +; CHECK-DAG: #[[COMP_PTR:.+]] = #llvm.di_derived_type ; CHECK-DAG: #[[SP_TYPE:.+]] = #llvm.di_subroutine_type -; CHECK-DAG: #[[SP_INNER:.+]] = #llvm.di_subprogram -; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type - -; CHECK-DAG: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type -; CHECK-DAG: #[[SP_TYPE_OUTER:.+]] = #llvm.di_subroutine_type -; CHECK-DAG: #[[SP_OUTER:.+]] = #llvm.di_subprogram -; CHECK-DAG: #[[LOC]] = loc(fused<#[[SP_OUTER]]> +; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram +; CHECK-DAG: #[[LOC]] = loc(fused<#[[SP]]> !llvm.dbg.cu = !{!1} !llvm.module.flags = !{!0} @@ -337,10 +333,10 @@ define void @class_method() { ; // ----- ; Verify the cyclic composite type is handled correctly. -; CHECK-DAG: #[[COMP_SELF:.+]] = #llvm.di_composite_type +; CHECK-DAG: #[[COMP_SELF:.+]] = #llvm.di_composite_type ; CHECK-DAG: #[[COMP_PTR_INNER:.+]] = #llvm.di_derived_type ; CHECK-DAG: #[[FIELD:.+]] = #llvm.di_derived_type -; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type +; CHECK-DAG: #[[COMP:.+]] = #llvm.di_composite_type ; CHECK-DAG: #[[COMP_PTR_OUTER:.+]] = #llvm.di_derived_type ; CHECK-DAG: #[[VAR0:.+]] = #llvm.di_local_variable @@ -612,9 +608,10 @@ define void @distinct_cu_func1() !dbg !5 { ; CHECK-LABEL: @declaration declare !dbg !1 void @declaration() -; CHECK: #di_subprogram = #llvm.di_subprogram< +; CHECK: #[[SP:.+]] = #llvm.di_subprogram< ; CHECK-NOT: id = distinct ; CHECK-NOT: subprogramFlags = +; CHECK: loc(fused<#[[SP]]> !llvm.module.flags = !{!0} !0 = !{i32 2, !"Debug Info Version", i32 3} @@ -635,14 +632,14 @@ declare !dbg !1 void @declaration() ; CHECK-DAG: #[[B1_INNER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B1", baseType = #[[B_SELF:.+]]> ; CHECK-DAG: #[[B2_INNER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B2", baseType = #[[B_SELF]]> -; CHECK-DAG: #[[B_INNER:.+]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[B1_INNER]], #[[B2_INNER]] +; CHECK-DAG: #[[B_INNER:.+]] = #llvm.di_composite_type ; CHECK-DAG: #[[B2_OUTER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B2", baseType = #[[B_INNER]]> -; CHECK-DAG: #[[A_OUTER:.+]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID:.+]], {{.*}}name = "A", {{.*}}elements = #[[B1_OUTER]], #[[B2_OUTER]] +; CHECK-DAG: #[[A_OUTER:.+]] = #llvm.di_composite_typeB", {{.*}}baseType = #[[B_OUTER:.+]]> -; CHECK-DAG: #[[B_OUTER]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[TO_C_INNER:.+]]> +; CHECK-DAG: #[[B_OUTER]] = #llvm.di_composite_type ; CHECK-DAG: #[[TO_C_INNER]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_INNER:.+]]> ; CHECK-DAG: #[[C_INNER]] = #llvm.di_composite_type<{{.*}}name = "C", {{.*}}elements = #[[TO_B_SELF:.+]]> ; CHECK-DAG: #[[TO_B_SELF]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_SELF:.+]]> -; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID]]> +; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type ; CHECK-DAG: #[[TO_C_OUTER]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_OUTER:.+]]> ; CHECK-DAG: #[[C_OUTER]] = #llvm.di_composite_type<{{.*}}name = "C", {{.*}}elements = #[[TO_B_OUTER]]> @@ -720,23 +717,23 @@ define void @class_field(ptr %arg1) !dbg !18 { ; ^ ^ ; +-------------+ -; CHECK-DAG: #[[A:.+]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID:.+]], {{.*}}name = "A", {{.*}}elements = #[[A_TO_B:.+]], #[[A_TO_C:.+]]> +; CHECK-DAG: #[[A:.+]] = #llvm.di_composite_type ; CHECK-DAG: #llvm.di_subprogram<{{.*}}scope = #[[A]], ; CHECK-DAG: #[[A_TO_B]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_FROM_A:.+]]> ; CHECK-DAG: #[[A_TO_C]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_FROM_A:.+]]> -; CHECK-DAG: #[[B_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[B_TO_C:.+]]> +; CHECK-DAG: #[[B_FROM_A]] = #llvm.di_composite_type ; CHECK-DAG: #[[B_TO_C]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_FROM_B:.+]]> -; CHECK-DAG: #[[C_FROM_B]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID:.+]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF:.+]], #[[TO_B_SELF:.+]], #[[TO_C_SELF:.+]]> +; CHECK-DAG: #[[C_FROM_B]] = #llvm.di_composite_type -; CHECK-DAG: #[[C_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF]], #[[A_TO_B:.+]], #[[TO_C_SELF]] +; CHECK-DAG: #[[C_FROM_A]] = #llvm.di_composite_typeA", {{.*}}baseType = #[[A_SELF:.+]]> ; CHECK-DAG: #[[TO_B_SELF]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_SELF:.+]]> ; CHECK-DAG: #[[TO_C_SELF]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_SELF:.+]]> -; CHECK-DAG: #[[A_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID]]> -; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID]]> -; CHECK-DAG: #[[C_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]]> +; CHECK-DAG: #[[A_SELF]] = #llvm.di_composite_type +; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type +; CHECK-DAG: #[[C_SELF]] = #llvm.di_composite_type define void @class_field(ptr %arg1) !dbg !18 { ret void @@ -818,4 +815,6 @@ define void @imp_fn() !dbg !12 { !17 = !DIImportedEntity(tag: DW_TAG_imported_module, scope: !12, entity: !8, file: !3, line: 1, elements: !15) ; CHECK-DAG: #[[M:.+]] = #llvm.di_module<{{.*}}name = "mod1"{{.*}}> -; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<{{.*}}name = "imp_fn"{{.*}}retainedNodes = #llvm.di_imported_entity> +; CHECK-DAG: #[[SP_REC:.+]] = #llvm.di_subprogram, isRecSelf = true> +; CHECK-DAG: #[[IE:.+]] = #llvm.di_imported_entity +; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<{{.*}}name = "imp_fn"{{.*}}retainedNodes = #[[IE]]> diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index 7bbaa251fdfb83..c56c508b1e8bbb 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -374,23 +374,28 @@ llvm.func @fn_with_gl() { llvm.func @imp_fn() { llvm.return } loc(#loc2) -#file = #llvm.di_file<"test.f90" in ""> -#SP_TY = #llvm.di_subroutine_type -#CU = #llvm.di_compile_unit, - sourceLanguage = DW_LANG_Fortran95, file = #file, isOptimized = false, + +#di_file = #llvm.di_file<"test.f90" in ""> +#di_subroutine_type = #llvm.di_subroutine_type +#di_compile_unit = #llvm.di_compile_unit, + sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full> -#MOD = #llvm.di_module -#MOD1 = #llvm.di_module -#SP = #llvm.di_subprogram, compileUnit = #CU, scope = #file, - name = "imp_fn", file = #file, subprogramFlags = Definition, type = #SP_TY, - retainedNodes = #llvm.di_imported_entity, #llvm.di_imported_entity> +#di_module_1 = #llvm.di_module +#di_module_2 = #llvm.di_module +#di_subprogram_self_rec = #llvm.di_subprogram> +#di_imported_entity_1 = #llvm.di_imported_entity +#di_imported_entity_2 = #llvm.di_imported_entity +#di_subprogram = #llvm.di_subprogram, recId = distinct[1]<>, + compileUnit = #di_compile_unit, scope = #di_file, name = "imp_fn", + file = #di_file, subprogramFlags = Definition, type = #di_subroutine_type, + retainedNodes = #di_imported_entity_1, #di_imported_entity_2> #loc1 = loc("test.f90":12:14) -#loc2 = loc(fused<#SP>[#loc1]) +#loc2 = loc(fused<#di_subprogram>[#loc1]) // CHECK-DAG: ![[SP:[0-9]+]] = {{.*}}!DISubprogram(name: "imp_fn"{{.*}}retainedNodes: ![[NODES:[0-9]+]]) -// CHECK-DAG: ![[NODES]] = !{![[NODE2:[0-9]+]], ![[NODE1:[0-9]+]]} +// CHECK-DAG: ![[NODES]] = !{![[NODE1:[0-9]+]], ![[NODE2:[0-9]+]]} // CHECK-DAG: ![[NODE1]] = !DIImportedEntity(tag: DW_TAG_imported_module, scope: ![[SP]], entity: ![[MOD1:[0-9]+]]{{.*}}) // CHECK-DAG: ![[NODE2]] = !DIImportedEntity(tag: DW_TAG_imported_module, scope: ![[SP]], entity: ![[MOD2:[0-9]+]]{{.*}}) // CHECK-DAG: ![[MOD1]] = !DIModule({{.*}}name: "mod1"{{.*}}) @@ -445,7 +450,7 @@ llvm.func @func_debug_directives() { #di_compile_unit = #llvm.di_compile_unit, sourceLanguage = DW_LANG_C, file = #di_file, isOptimized = false, emissionKind = None> // Recursive type itself. -#di_struct_self = #llvm.di_composite_type> +#di_struct_self = #llvm.di_composite_type, isRecSelf = true> #di_ptr_inner = #llvm.di_derived_type #di_subroutine_inner = #llvm.di_subroutine_type #di_subprogram_inner = #llvm.di_subprogram< @@ -499,7 +504,7 @@ llvm.func @class_method() { // Ensures composite types with a recursive scope work. -#di_composite_type_self = #llvm.di_composite_type> +#di_composite_type_self = #llvm.di_composite_type, isRecSelf = true> #di_file = #llvm.di_file<"test.mlir" in "/"> #di_subroutine_type = #llvm.di_subroutine_type #di_subprogram = #llvm.di_subprogram @@ -510,7 +515,7 @@ llvm.func @class_method() { llvm.mlir.global @global_variable() {dbg_expr = #di_global_variable_expression} : !llvm.struct<()> // CHECK: distinct !DIGlobalVariable({{.*}}type: ![[COMP:[0-9]+]], -// CHECK: ![[COMP]] = distinct !DICompositeType({{.*}}scope: ![[SCOPE:[0-9]+]], +// CHECK: ![[COMP]] = distinct !DICompositeType({{.*}}scope: ![[SCOPE:[0-9]+]] // CHECK: ![[SCOPE]] = !DISubprogram({{.*}}type: ![[SUBROUTINE:[0-9]+]], // CHECK: ![[SUBROUTINE]] = !DISubroutineType(types: ![[SR_TYPES:[0-9]+]]) // CHECK: ![[SR_TYPES]] = !{![[COMP]]} @@ -522,7 +527,7 @@ llvm.mlir.global @global_variable() {dbg_expr = #di_global_variable_expression} // replaced with the recursive self reference. #di_file = #llvm.di_file<"test.mlir" in "/"> -#di_composite_type_self = #llvm.di_composite_type> +#di_composite_type_self = #llvm.di_composite_type, isRecSelf = true> #di_subroutine_type_inner = #llvm.di_subroutine_type #di_subprogram_inner = #llvm.di_subprogram @@ -542,7 +547,7 @@ llvm.mlir.global @global_variable() {dbg_expr = #di_global_variable_expression} // CHECK: distinct !DIGlobalVariable({{.*}}type: ![[VAR:[0-9]+]], // CHECK: ![[VAR]] = !DISubroutineType(types: ![[COMPS:[0-9]+]]) // CHECK: ![[COMPS]] = !{![[COMP:[0-9]+]], -// CHECK: ![[COMP]] = distinct !DICompositeType({{.*}}scope: ![[SCOPE:[0-9]+]], +// CHECK: ![[COMP]] = distinct !DICompositeType({{.*}}scope: ![[SCOPE:[0-9]+]] // CHECK: ![[SCOPE]] = !DISubprogram({{.*}}type: ![[SUBROUTINE:[0-9]+]], // CHECK: ![[SUBROUTINE]] = !DISubroutineType(types: ![[SR_TYPES:[0-9]+]]) // CHECK: ![[SR_TYPES]] = !{![[COMP]]}