diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index b347c443da677f..b00a4b984a1d23 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -48,7 +48,6 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \ -D LLVM_ENABLE_LLD=ON \ -D CMAKE_CXX_FLAGS=-gmlt \ - -D BOLT_CLANG_EXE=/usr/bin/clang \ -D LLVM_CCACHE_BUILD=ON \ -D MLIR_ENABLE_BINDINGS_PYTHON=ON diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index 9cf64417d3cb2c..d608ea449f1d40 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1,3 +1,6 @@ +BOLT: + - bolt/**/* + ClangIR: - clang/include/clang/CIR/**/* - clang/lib/CIR/**/* @@ -467,6 +470,7 @@ backend:m68k: libc++: - libcxx/** + - .github/workflows/libcxx-* libc++abi: - libcxxabi/** diff --git a/bolt/include/bolt/Passes/BinaryPasses.h b/bolt/include/bolt/Passes/BinaryPasses.h index 8d89ef8b5484f8..5d7692559eda88 100644 --- a/bolt/include/bolt/Passes/BinaryPasses.h +++ b/bolt/include/bolt/Passes/BinaryPasses.h @@ -400,8 +400,7 @@ class PrintProfileStats : public BinaryFunctionPass { /// dyno stats categories. class PrintProgramStats : public BinaryFunctionPass { public: - explicit PrintProgramStats(const cl::opt &PrintPass) - : BinaryFunctionPass(PrintPass) {} + explicit PrintProgramStats() : BinaryFunctionPass(false) {} const char *getName() const override { return "print-stats"; } bool shouldPrint(const BinaryFunction &) const override { return false; } diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 0b2a4e86561f3a..70e324cc0165bb 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -14,6 +14,7 @@ #include "bolt/Profile/DataAggregator.h" #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" +#include "bolt/Passes/BinaryPasses.h" #include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/Heatmap.h" #include "bolt/Profile/YAMLProfileWriter.h" @@ -611,6 +612,7 @@ Error DataAggregator::readProfile(BinaryContext &BC) { if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile)) report_error("cannot create output data file", EC); } + BC.logBOLTErrorsAndQuitOnFatal(PrintProgramStats().runOnFunctions(BC)); } return Error::success(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index be4888ccfa5645..cbb7199a53ddd1 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -356,7 +356,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { // order they're registered. // Run this pass first to use stats for the original functions. - Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique()); if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp index fa43b7a2f92c23..74b5ca18abce42 100644 --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -292,7 +292,7 @@ class RewriteInstanceDiff { } } } - PrintProgramStats PPS(opts::NeverPrint); + PrintProgramStats PPS; outs() << "* BOLT-DIFF: Starting print program stats pass for binary 1\n"; RI1.BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*RI1.BC)); outs() << "* BOLT-DIFF: Starting print program stats pass for binary 2\n"; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index a6b2f3cc0850c3..3cf0e749f9d667 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1725,12 +1725,6 @@ void RewriteInstance::adjustFunctionBoundaries() { if (!Function.isSymbolValidInScope(Symbol, SymbolSize)) break; - // Ignore unnamed symbols. Used, for example, by debugging info on RISC-V. - if (BC->isRISCV() && cantFail(Symbol.getName()).empty()) { - ++NextSymRefI; - continue; - } - // Skip basic block labels. This happens on RISC-V with linker relaxation // enabled because every branch needs a relocation and corresponding // symbol. We don't want to add such symbols as entry points. diff --git a/bolt/test/RISCV/unnamed-sym-no-entry.c b/bolt/test/RISCV/fake-label-no-entry.c similarity index 88% rename from bolt/test/RISCV/unnamed-sym-no-entry.c rename to bolt/test/RISCV/fake-label-no-entry.c index b4173506b213ce..bd125263101bb4 100644 --- a/bolt/test/RISCV/unnamed-sym-no-entry.c +++ b/bolt/test/RISCV/fake-label-no-entry.c @@ -5,12 +5,12 @@ // RUN: %clang %cflags -g -Wl,-q -o %t %s -/// Verify that the binary indeed contains an unnamed symbol at _start +/// Verify that the binary indeed contains a fake label ".L0 " at _start. // RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=CHECK-ELF // CHECK-ELF-DAG: [[#%x,START:]] {{.*}} FUNC GLOBAL DEFAULT [[#%d,SECTION:]] _start{{$}} // CHECK-ELF-DAG: [[#%x,START]] {{.*}} NOTYPE LOCAL DEFAULT [[#SECTION]] .L0 {{$}} -/// Verify that BOLT did not create an extra entry point for the unnamed symbol +/// Verify that BOLT did not create an extra entry point for the fake label. // RUN: llvm-bolt -o %t.bolt %t --print-cfg | FileCheck %s // CHECK: Binary Function "_start" after building cfg { // CHECK: IsMultiEntry: 0 diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index e8c3f64239a27d..0bd44720f1b7a1 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -11,7 +11,14 @@ REQUIRES: system-linux RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ -RUN: --profile-use-dfs +RUN: --profile-use-dfs | FileCheck %s + +RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s + +CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile + RUN: cat %t | sort | FileCheck %s -check-prefix=PERF2BOLT RUN: cat %t.new | FileCheck %s -check-prefix=NEWFORMAT diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index 6852db6c2ee311..8005d6e91c060c 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -16,6 +16,7 @@ add_clang_library(clangTidyModernizeModule MakeSharedCheck.cpp MakeSmartPtrCheck.cpp MakeUniqueCheck.cpp + MinMaxUseInitializerListCheck.cpp ModernizeTidyModule.cpp PassByValueCheck.cpp RawStringLiteralCheck.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp new file mode 100644 index 00000000000000..45f7700463d570 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.cpp @@ -0,0 +1,271 @@ +//===--- MinMaxUseInitializerListCheck.cpp - clang-tidy -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MinMaxUseInitializerListCheck.h" +#include "../utils/ASTUtils.h" +#include "../utils/LexerUtils.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/Lexer.h" + +using namespace clang; + +namespace { + +struct FindArgsResult { + const Expr *First; + const Expr *Last; + const Expr *Compare; + SmallVector Args; +}; + +} // anonymous namespace + +using namespace clang::ast_matchers; + +namespace clang::tidy::modernize { + +static FindArgsResult findArgs(const CallExpr *Call) { + FindArgsResult Result; + Result.First = nullptr; + Result.Last = nullptr; + Result.Compare = nullptr; + + // check if the function has initializer list argument + if (Call->getNumArgs() < 3) { + auto ArgIterator = Call->arguments().begin(); + + const auto *InitListExpr = + dyn_cast(*ArgIterator); + const auto *InitList = + InitListExpr != nullptr + ? dyn_cast( + InitListExpr->getSubExpr()->IgnoreImplicit()) + : nullptr; + + if (InitList) { + Result.Args.append(InitList->inits().begin(), InitList->inits().end()); + Result.First = *ArgIterator; + Result.Last = *ArgIterator; + + // check if there is a comparison argument + std::advance(ArgIterator, 1); + if (ArgIterator != Call->arguments().end()) + Result.Compare = *ArgIterator; + + return Result; + } + Result.Args = SmallVector(Call->arguments()); + } else { + // if it has 3 arguments then the last will be the comparison + Result.Compare = *(std::next(Call->arguments().begin(), 2)); + Result.Args = SmallVector(llvm::drop_end(Call->arguments())); + } + Result.First = Result.Args.front(); + Result.Last = Result.Args.back(); + + return Result; +} + +static SmallVector +generateReplacements(const MatchFinder::MatchResult &Match, + const CallExpr *TopCall, const FindArgsResult &Result, + const bool IgnoreNonTrivialTypes, + const std::uint64_t IgnoreTrivialTypesOfSizeAbove) { + SmallVector FixItHints; + const SourceManager &SourceMngr = *Match.SourceManager; + const LangOptions &LanguageOpts = Match.Context->getLangOpts(); + + const QualType ResultType = TopCall->getDirectCallee() + ->getReturnType() + .getCanonicalType() + .getNonReferenceType() + .getUnqualifiedType(); + + // check if the type is trivial + const bool IsResultTypeTrivial = ResultType.isTrivialType(*Match.Context); + + if ((!IsResultTypeTrivial && IgnoreNonTrivialTypes)) + return FixItHints; + + if (IsResultTypeTrivial && + static_cast( + Match.Context->getTypeSizeInChars(ResultType).getQuantity()) > + IgnoreTrivialTypesOfSizeAbove) + return FixItHints; + + for (const Expr *Arg : Result.Args) { + const auto *InnerCall = dyn_cast(Arg->IgnoreParenImpCasts()); + + // If the argument is not a nested call + if (!InnerCall) { + // check if typecast is required + const QualType ArgType = Arg->IgnoreParenImpCasts() + ->getType() + .getCanonicalType() + .getUnqualifiedType(); + + if (ArgType == ResultType) + continue; + + const StringRef ArgText = Lexer::getSourceText( + CharSourceRange::getTokenRange(Arg->getSourceRange()), SourceMngr, + LanguageOpts); + + const auto Replacement = Twine("static_cast<") + .concat(ResultType.getAsString(LanguageOpts)) + .concat(">(") + .concat(ArgText) + .concat(")") + .str(); + + FixItHints.push_back( + FixItHint::CreateReplacement(Arg->getSourceRange(), Replacement)); + continue; + } + + const FindArgsResult InnerResult = findArgs(InnerCall); + + // if the nested call doesn't have arguments skip it + if (!InnerResult.First || !InnerResult.Last) + continue; + + // if the nested call is not the same as the top call + if (InnerCall->getDirectCallee()->getQualifiedNameAsString() != + TopCall->getDirectCallee()->getQualifiedNameAsString()) + continue; + + // if the nested call doesn't have the same compare function + if ((Result.Compare || InnerResult.Compare) && + !utils::areStatementsIdentical(Result.Compare, InnerResult.Compare, + *Match.Context)) + continue; + + // remove the function call + FixItHints.push_back( + FixItHint::CreateRemoval(InnerCall->getCallee()->getSourceRange())); + + // remove the parentheses + const auto LParen = utils::lexer::findNextTokenSkippingComments( + InnerCall->getCallee()->getEndLoc(), SourceMngr, LanguageOpts); + if (LParen.has_value() && LParen->is(tok::l_paren)) + FixItHints.push_back( + FixItHint::CreateRemoval(SourceRange(LParen->getLocation()))); + FixItHints.push_back( + FixItHint::CreateRemoval(SourceRange(InnerCall->getRParenLoc()))); + + // if the inner call has an initializer list arg + if (InnerResult.First == InnerResult.Last) { + // remove the initializer list braces + FixItHints.push_back(FixItHint::CreateRemoval( + CharSourceRange::getTokenRange(InnerResult.First->getBeginLoc()))); + FixItHints.push_back(FixItHint::CreateRemoval( + CharSourceRange::getTokenRange(InnerResult.First->getEndLoc()))); + } + + const SmallVector InnerReplacements = generateReplacements( + Match, InnerCall, InnerResult, IgnoreNonTrivialTypes, + IgnoreTrivialTypesOfSizeAbove); + + FixItHints.append(InnerReplacements); + + if (InnerResult.Compare) { + // find the comma after the value arguments + const auto Comma = utils::lexer::findNextTokenSkippingComments( + InnerResult.Last->getEndLoc(), SourceMngr, LanguageOpts); + + // remove the comma and the comparison + if (Comma.has_value() && Comma->is(tok::comma)) + FixItHints.push_back( + FixItHint::CreateRemoval(SourceRange(Comma->getLocation()))); + + FixItHints.push_back( + FixItHint::CreateRemoval(InnerResult.Compare->getSourceRange())); + } + } + + return FixItHints; +} + +MinMaxUseInitializerListCheck::MinMaxUseInitializerListCheck( + StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + IgnoreNonTrivialTypes(Options.get("IgnoreNonTrivialTypes", true)), + IgnoreTrivialTypesOfSizeAbove( + Options.get("IgnoreTrivialTypesOfSizeAbove", 32L)), + Inserter(Options.getLocalOrGlobal("IncludeStyle", + utils::IncludeSorter::IS_LLVM), + areDiagsSelfContained()) {} + +void MinMaxUseInitializerListCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "IgnoreNonTrivialTypes", IgnoreNonTrivialTypes); + Options.store(Opts, "IgnoreTrivialTypesOfSizeAbove", + IgnoreTrivialTypesOfSizeAbove); + Options.store(Opts, "IncludeStyle", Inserter.getStyle()); +} + +void MinMaxUseInitializerListCheck::registerMatchers(MatchFinder *Finder) { + auto CreateMatcher = [](const StringRef FunctionName) { + auto FuncDecl = functionDecl(hasName(FunctionName)); + auto Expression = callExpr(callee(FuncDecl)); + + return callExpr(callee(FuncDecl), + anyOf(hasArgument(0, Expression), + hasArgument(1, Expression), + hasArgument(0, cxxStdInitializerListExpr())), + unless(hasParent(Expression))) + .bind("topCall"); + }; + + Finder->addMatcher(CreateMatcher("::std::max"), this); + Finder->addMatcher(CreateMatcher("::std::min"), this); +} + +void MinMaxUseInitializerListCheck::registerPPCallbacks( + const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { + Inserter.registerPreprocessor(PP); +} + +void MinMaxUseInitializerListCheck::check( + const MatchFinder::MatchResult &Match) { + + const auto *TopCall = Match.Nodes.getNodeAs("topCall"); + + const FindArgsResult Result = findArgs(TopCall); + const SmallVector Replacements = + generateReplacements(Match, TopCall, Result, IgnoreNonTrivialTypes, + IgnoreTrivialTypesOfSizeAbove); + + if (Replacements.empty()) + return; + + const DiagnosticBuilder Diagnostic = + diag(TopCall->getBeginLoc(), + "do not use nested 'std::%0' calls, use an initializer list instead") + << TopCall->getDirectCallee()->getName() + << Inserter.createIncludeInsertion( + Match.SourceManager->getFileID(TopCall->getBeginLoc()), + ""); + + // if the top call doesn't have an initializer list argument + if (Result.First != Result.Last) { + // add { and } insertions + Diagnostic << FixItHint::CreateInsertion(Result.First->getBeginLoc(), "{"); + + Diagnostic << FixItHint::CreateInsertion( + Lexer::getLocForEndOfToken(Result.Last->getEndLoc(), 0, + *Match.SourceManager, + Match.Context->getLangOpts()), + "}"); + } + + Diagnostic << Replacements; +} + +} // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h new file mode 100644 index 00000000000000..577d1265307612 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/MinMaxUseInitializerListCheck.h @@ -0,0 +1,56 @@ +//===--- MinMaxUseInitializerListCheck.h - clang-tidy -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H + +#include "../ClangTidyCheck.h" +#include "../utils/IncludeInserter.h" + +namespace clang::tidy::modernize { + +/// Replaces nested ``std::min`` and ``std::max`` calls with an initializer list +/// where applicable. +/// +/// For example: +/// +/// \code +/// int a = std::max(std::max(i, j), k); +/// \endcode +/// +/// This code is transformed to: +/// +/// \code +/// int a = std::max({i, j, k}); +/// \endcode +class MinMaxUseInitializerListCheck : public ClangTidyCheck { +public: + MinMaxUseInitializerListCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override; + void check(const ast_matchers::MatchFinder::MatchResult &Match) override; + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus11; + } + std::optional getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } + +private: + bool IgnoreNonTrivialTypes; + std::uint64_t IgnoreTrivialTypesOfSizeAbove; + utils::IncludeInserter Inserter; +}; + +} // namespace clang::tidy::modernize + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_MINMAXUSEINITIALIZERLISTCHECK_H diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index e96cf274f58cfe..776558433c5baa 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -18,6 +18,7 @@ #include "MacroToEnumCheck.h" #include "MakeSharedCheck.h" #include "MakeUniqueCheck.h" +#include "MinMaxUseInitializerListCheck.h" #include "PassByValueCheck.h" #include "RawStringLiteralCheck.h" #include "RedundantVoidArgCheck.h" @@ -68,6 +69,8 @@ class ModernizeModule : public ClangTidyModule { CheckFactories.registerCheck("modernize-macro-to-enum"); CheckFactories.registerCheck("modernize-make-shared"); CheckFactories.registerCheck("modernize-make-unique"); + CheckFactories.registerCheck( + "modernize-min-max-use-initializer-list"); CheckFactories.registerCheck("modernize-pass-by-value"); CheckFactories.registerCheck( "modernize-use-designated-initializers"); diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index dd772d69202548..41065fc8e87859 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -28,6 +28,7 @@ add_clang_library(clangTidyReadabilityModule IsolateDeclarationCheck.cpp MagicNumbersCheck.cpp MakeMemberFunctionConstCheck.cpp + MathMissingParenthesesCheck.cpp MisleadingIndentationCheck.cpp MisplacedArrayIndexCheck.cpp NamedParameterCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp new file mode 100644 index 00000000000000..d1e20b9074cec1 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.cpp @@ -0,0 +1,97 @@ +//===--- MathMissingParenthesesCheck.cpp - clang-tidy ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MathMissingParenthesesCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::readability { + +void MathMissingParenthesesCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(binaryOperator(unless(hasParent(binaryOperator())), + unless(isAssignmentOperator()), + unless(isComparisonOperator()), + unless(hasAnyOperatorName("&&", "||")), + hasDescendant(binaryOperator())) + .bind("binOp"), + this); +} + +static int getPrecedence(const BinaryOperator *BinOp) { + if (!BinOp) + return 0; + switch (BinOp->getOpcode()) { + case BO_Mul: + case BO_Div: + case BO_Rem: + return 5; + case BO_Add: + case BO_Sub: + return 4; + case BO_And: + return 3; + case BO_Xor: + return 2; + case BO_Or: + return 1; + default: + return 0; + } +} +static void addParantheses(const BinaryOperator *BinOp, + const BinaryOperator *ParentBinOp, + ClangTidyCheck *Check, + const clang::SourceManager &SM, + const clang::LangOptions &LangOpts) { + if (!BinOp) + return; + + int Precedence1 = getPrecedence(BinOp); + int Precedence2 = getPrecedence(ParentBinOp); + + if (ParentBinOp != nullptr && Precedence1 != Precedence2) { + const clang::SourceLocation StartLoc = BinOp->getBeginLoc(); + const clang::SourceLocation EndLoc = + clang::Lexer::getLocForEndOfToken(BinOp->getEndLoc(), 0, SM, LangOpts); + if (EndLoc.isInvalid()) + return; + + Check->diag(StartLoc, + "'%0' has higher precedence than '%1'; add parentheses to " + "explicitly specify the order of operations") + << (Precedence1 > Precedence2 ? BinOp->getOpcodeStr() + : ParentBinOp->getOpcodeStr()) + << (Precedence1 > Precedence2 ? ParentBinOp->getOpcodeStr() + : BinOp->getOpcodeStr()) + << FixItHint::CreateInsertion(StartLoc, "(") + << FixItHint::CreateInsertion(EndLoc, ")") + << SourceRange(StartLoc, EndLoc); + } + + addParantheses(dyn_cast(BinOp->getLHS()->IgnoreImpCasts()), + BinOp, Check, SM, LangOpts); + addParantheses(dyn_cast(BinOp->getRHS()->IgnoreImpCasts()), + BinOp, Check, SM, LangOpts); +} + +void MathMissingParenthesesCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *BinOp = Result.Nodes.getNodeAs("binOp"); + std::vector< + std::pair>> + Insertions; + const SourceManager &SM = *Result.SourceManager; + const clang::LangOptions &LO = Result.Context->getLangOpts(); + addParantheses(BinOp, nullptr, this, SM, LO); +} + +} // namespace clang::tidy::readability diff --git a/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h new file mode 100644 index 00000000000000..9a9d2b3cfaabae --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/MathMissingParenthesesCheck.h @@ -0,0 +1,34 @@ +//===--- MathMissingParenthesesCheck.h - clang-tidy -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::readability { + +/// Check for mising parantheses in mathematical expressions that involve +/// operators of different priorities. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/readability/math-missing-parentheses.html +class MathMissingParenthesesCheck : public ClangTidyCheck { +public: + MathMissingParenthesesCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + std::optional getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } +}; + +} // namespace clang::tidy::readability + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_MATHMISSINGPARENTHESESCHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 376b84683df74e..d61c0ba39658e5 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -32,6 +32,7 @@ #include "IsolateDeclarationCheck.h" #include "MagicNumbersCheck.h" #include "MakeMemberFunctionConstCheck.h" +#include "MathMissingParenthesesCheck.h" #include "MisleadingIndentationCheck.h" #include "MisplacedArrayIndexCheck.h" #include "NamedParameterCheck.h" @@ -105,6 +106,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-identifier-naming"); CheckFactories.registerCheck( "readability-implicit-bool-conversion"); + CheckFactories.registerCheck( + "readability-math-missing-parentheses"); CheckFactories.registerCheck( "readability-redundant-inline-specifier"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp index 2075e5965f181e..9b4442b0bb76fd 100644 --- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp +++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp @@ -253,7 +253,7 @@ void getSignature(const CodeCompletionString &CCS, std::string *Signature, if (!IncludeFunctionArguments && ResultKind == CodeCompletionResult::RK_Declaration) TruncateSnippetAt.emplace(Snippet->size()); - LLVM_FALLTHROUGH; + [[fallthrough]]; case CodeCompletionString::CK_RightParen: case CodeCompletionString::CK_LeftBracket: case CodeCompletionString::CK_RightBracket: diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 5b1feffb89ea06..2867fc95803048 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -131,6 +131,12 @@ New checks to reading out-of-bounds data due to inadequate or incorrect string null termination. +- New :doc:`modernize-min-max-use-initializer-list + ` check. + + Replaces nested ``std::min`` and ``std::max`` calls with an initializer list + where applicable. + - New :doc:`modernize-use-designated-initializers ` check. @@ -143,6 +149,12 @@ New checks Enforces consistent style for enumerators' initialization, covering three styles: none, first only, or all initialized explicitly. +- New :doc:`readability-math-missing-parentheses + ` check. + + Check for missing parentheses in mathematical expressions that involve + operators of different priorities. + - New :doc:`readability-use-std-min-max ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 5d9d487f75f9cb..49747ff896ba5c 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -276,6 +276,7 @@ Clang-Tidy Checks :doc:`modernize-macro-to-enum `, "Yes" :doc:`modernize-make-shared `, "Yes" :doc:`modernize-make-unique `, "Yes" + :doc:`modernize-min-max-use-initializer-list `, "Yes" :doc:`modernize-pass-by-value `, "Yes" :doc:`modernize-raw-string-literal `, "Yes" :doc:`modernize-redundant-void-arg `, "Yes" @@ -363,6 +364,7 @@ Clang-Tidy Checks :doc:`readability-isolate-declaration `, "Yes" :doc:`readability-magic-numbers `, :doc:`readability-make-member-function-const `, "Yes" + :doc:`readability-math-missing-parentheses `, "Yes" :doc:`readability-misleading-indentation `, :doc:`readability-misplaced-array-index `, "Yes" :doc:`readability-named-parameter `, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst new file mode 100644 index 00000000000000..d6721a25629b05 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/min-max-use-initializer-list.rst @@ -0,0 +1,50 @@ +.. title:: clang-tidy - modernize-min-max-use-initializer-list + +modernize-min-max-use-initializer-list +====================================== + +Replaces nested ``std::min`` and ``std::max`` calls with an initializer list +where applicable. + +For instance, consider the following code: + +.. code-block:: cpp + + int a = std::max(std::max(i, j), k); + +The check will transform the above code to: + +.. code-block:: cpp + + int a = std::max({i, j, k}); + +Performance Considerations +========================== + +While this check simplifies the code and makes it more readable, it may cause +performance degradation for non-trivial types due to the need to copy objects +into the initializer list. + +To avoid this, it is recommended to use `std::ref` or `std::cref` for +non-trivial types: + +.. code-block:: cpp + + std::string b = std::max({std::ref(i), std::ref(j), std::ref(k)}); + +Options +======= + +.. option:: IncludeStyle + + A string specifying which include-style is used, `llvm` or `google`. Default + is `llvm`. + +.. option:: IgnoreNonTrivialTypes + + A boolean specifying whether to ignore non-trivial types. Default is `true`. + +.. option:: IgnoreTrivialTypesOfSizeAbove + + An integer specifying the size (in bytes) above which trivial types are + ignored. Default is `32`. \ No newline at end of file diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst new file mode 100644 index 00000000000000..21d66daab334c6 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/math-missing-parentheses.rst @@ -0,0 +1,27 @@ +.. title:: clang-tidy - readability-math-missing-parentheses + +readability-math-missing-parentheses +==================================== + +Check for missing parentheses in mathematical expressions that involve operators +of different priorities. + +Parentheses in mathematical expressions clarify the order +of operations, especially with different-priority operators. Lengthy or multiline +expressions can obscure this order, leading to coding errors. IDEs can aid clarity +by highlighting parentheses. Explicitly using parentheses also clarifies what the +developer had in mind when writing the expression. Ensuring their presence reduces +ambiguity and errors, promoting clearer and more maintainable code. + +Before: + +.. code-block:: c++ + + int x = 1 + 2 * 3 - 4 / 5; + + +After: + +.. code-block:: c++ + + int x = 1 + (2 * 3) - (4 / 5); \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp new file mode 100644 index 00000000000000..51ab9bda975f10 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/min-max-use-initializer-list.cpp @@ -0,0 +1,305 @@ +// RUN: %check_clang_tidy %s modernize-min-max-use-initializer-list %t + +// CHECK-FIXES: #include +namespace utils { +template +T max(T a, T b) { + return (a < b) ? b : a; +} +} // namespace utils + +namespace std { +template< class T > +struct initializer_list { + initializer_list()=default; + initializer_list(T*,int){} + const T* begin() const {return nullptr;} + const T* end() const {return nullptr;} +}; + +template +ForwardIt min_element(ForwardIt first, ForwardIt last) +{ + if (first == last) + return last; + + ForwardIt smallest = first; + + while (++first != last) + if (*first < *smallest) + smallest = first; + + return smallest; +} + +template +ForwardIt min_element(ForwardIt first, ForwardIt last, Compare comp) +{ + if (first == last) + return last; + + ForwardIt smallest = first; + + while (++first != last) + if (comp(*first, *smallest)) + smallest = first; + + return smallest; +} + +template +ForwardIt max_element(ForwardIt first, ForwardIt last) +{ + if (first == last) + return last; + + ForwardIt largest = first; + + while (++first != last) + if (*largest < *first) + largest = first; + + return largest; +} + +template +ForwardIt max_element(ForwardIt first, ForwardIt last, Compare comp) +{ + if (first == last) + return last; + + ForwardIt largest = first; + + while(++first != last) + if (comp(*largest, *first)) + largest = first; + + return largest; +} + +template< class T > +const T& max( const T& a, const T& b ) { + return (a < b) ? b : a; +}; + +template< class T > +T max(std::initializer_list ilist) +{ + return *std::max_element(ilist.begin(), ilist.end()); +} + +template< class T, class Compare > +const T& max( const T& a, const T& b, Compare comp ) { + return (comp(a, b)) ? b : a; +}; + +template< class T, class Compare > +T max(std::initializer_list ilist, Compare comp) { + return *std::max_element(ilist.begin(), ilist.end(), comp); +}; + +template< class T > +const T& min( const T& a, const T& b ) { + return (b < a) ? b : a; +}; + +template< class T > +T min(std::initializer_list ilist) +{ + return *std::min_element(ilist.begin(), ilist.end()); +} + + +template< class T, class Compare > +const T& min( const T& a, const T& b, Compare comp ) { + return (comp(b, a)) ? b : a; +}; + +template< class T, class Compare > +T min(std::initializer_list ilist, Compare comp) { + return *std::min_element(ilist.begin(), ilist.end(), comp); +}; + +} // namespace std + +using namespace std; + +namespace { +bool fless_than(int a, int b) { +return a < b; +} + +bool fgreater_than(int a, int b) { +return a > b; +} +auto less_than = [](int a, int b) { return a < b; }; +auto greater_than = [](int a, int b) { return a > b; }; + +int max1 = std::max(1, std::max(2, 3)); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max1 = std::max({1, 2, 3}); + +int min1 = std::min(1, std::min(2, 3)); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min1 = std::min({1, 2, 3}); + +int max2 = std::max(1, std::max(2, std::max(3, 4))); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max2 = std::max({1, 2, 3, 4}); + +int max2b = std::max(std::max(std::max(1, 2), std::max(3, 4)), std::max(std::max(5, 6), std::max(7, 8))); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max2b = std::max({1, 2, 3, 4, 5, 6, 7, 8}); + +int max2c = std::max(std::max(1, std::max(2, 3)), 4); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max2c = std::max({1, 2, 3, 4}); + +int max2d = std::max(std::max({1, 2, 3}), 4); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max2d = std::max({1, 2, 3, 4}); + + +int max2e = std::max(1, max(2, max(3, 4))); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max2e = std::max({1, 2, 3, 4}); + +int min2 = std::min(1, std::min(2, std::min(3, 4))); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min2 = std::min({1, 2, 3, 4}); + +int max3 = std::max(std::max(4, 5), std::min(2, std::min(3, 1))); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max3 = std::max({4, 5, std::min({2, 3, 1})}); + +int min3 = std::min(std::min(4, 5), std::max(2, std::max(3, 1))); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min3 = std::min({4, 5, std::max({2, 3, 1})}); + +int max4 = std::max(1, std::max(2, 3, greater_than), less_than); +// CHECK-FIXES: int max4 = std::max(1, std::max(2, 3, greater_than), less_than); + +int min4 = std::min(1, std::min(2, 3, greater_than), less_than); +// CHECK-FIXES: int min4 = std::min(1, std::min(2, 3, greater_than), less_than); + +int max5 = std::max(1, std::max(2, 3), less_than); +// CHECK-FIXES: int max5 = std::max(1, std::max(2, 3), less_than); + +int min5 = std::min(1, std::min(2, 3), less_than); +// CHECK-FIXES: int min5 = std::min(1, std::min(2, 3), less_than); + +int max6 = std::max(1, std::max(2, 3, greater_than), greater_than); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max6 = std::max({1, 2, 3 }, greater_than); + +int min6 = std::min(1, std::min(2, 3, greater_than), greater_than); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min6 = std::min({1, 2, 3 }, greater_than); + +int max7 = std::max(1, std::max(2, 3, fless_than), fgreater_than); +// CHECK-FIXES: int max7 = std::max(1, std::max(2, 3, fless_than), fgreater_than); + +int min7 = std::min(1, std::min(2, 3, fless_than), fgreater_than); +// CHECK-FIXES: int min7 = std::min(1, std::min(2, 3, fless_than), fgreater_than); + +int max8 = std::max(1, std::max(2, 3, fless_than), less_than); +// CHECK-FIXES: int max8 = std::max(1, std::max(2, 3, fless_than), less_than) + +int min8 = std::min(1, std::min(2, 3, fless_than), less_than); +// CHECK-FIXES: int min8 = std::min(1, std::min(2, 3, fless_than), less_than); + +int max9 = std::max(1, std::max(2, 3, fless_than), fless_than); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max9 = std::max({1, 2, 3 }, fless_than); + +int min9 = std::min(1, std::min(2, 3, fless_than), fless_than); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min9 = std::min({1, 2, 3 }, fless_than); + +int min10 = std::min(std::min(4, 5), std::max(2, utils::max(3, 1))); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::min' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int min10 = std::min({4, 5, std::max(2, utils::max(3, 1))}); + +int max10 = std::max({std::max(1, 2), std::max({5, 6, 1}), 2, std::min({1, 2, 4})}); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int max10 = std::max({1, 2, 5, 6, 1, 2, std::min({1, 2, 4})}); + +int typecastTest = std::max(std::max(0U, 0.0f), 0); +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int typecastTest = std::max({static_cast(0U), static_cast(0.0f), 0}); + +int typecastTest1 = std::max(std::max(0U, 0.0f), 0L); +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int typecastTest1 = std::max({static_cast(0U), static_cast(0.0f), 0L}); + +int typecastTest2 = std::max(std::max(10U, 20.0f), 30); +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int typecastTest2 = std::max({static_cast(10U), static_cast(20.0f), 30}); + +int typecastTest3 = std::max(std::max(0U, std::max(0.0f, 1.0f)), 0); +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int typecastTest3 = std::max({static_cast(0U), static_cast(0.0f), static_cast(1.0f), 0}); + +#define max3f(a, b, c) std::max(a, std::max(b, c)) +// CHECK-FIXES: #define max3f(a, b, c) std::max(a, std::max(b, c)) + +#define value 4545 +int macroVarMax = std::max(value, std::max(1, 2)); +// CHECK-MESSAGES: :[[@LINE-1]]:19: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int macroVarMax = std::max({value, 1, 2}); + +#define value2 45U +int macroVarMax2 = std::max(1, std::max(value2, 2.0f)); +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: do not use nested 'std::max' calls, use an initializer list instead [modernize-min-max-use-initializer-list] +// CHECK-FIXES: int macroVarMax2 = std::max({1, static_cast(value2), static_cast(2.0f)}); + +// True-negative tests +int maxTN1 = std::max(1, 2); +// CHECK-FIXES: int maxTN1 = std::max(1, 2); + +int maxTN2 = std::max({1, 2, 3}); +// CHECK-FIXES: int maxTN2 = std::max({1, 2, 3}); + +int maxTN3 = std::max({1, 2, 3}, less_than); +// CHECK-FIXES: int maxTN3 = std::max({1, 2, 3}, less_than); + +// non-trivial types +struct A { + int a; + A(int a) : a(a) {} + bool operator<(const A &rhs) const { return a < rhs.a; } +}; + +A maxNT1 = std::max(A(1), A(2)); +// CHECK-FIXES: A maxNT1 = std::max(A(1), A(2)); + +A maxNT2 = std::max(A(1), std::max(A(2), A(3))); +// CHECK-FIXES: A maxNT2 = std::max(A(1), std::max(A(2), A(3))); + +A maxNT3 = std::max(A(1), std::max(A(2), A(3)), [](const A &lhs, const A &rhs) { return lhs.a < rhs.a; }); +// CHECK-FIXES: A maxNT3 = std::max(A(1), std::max(A(2), A(3)), [](const A &lhs, const A &rhs) { return lhs.a < rhs.a; }); + +// Trivial type with size greater than 32 +struct B { + // 9*4 = 36 bytes > 32 bytes + int a[9]; + + bool operator<(const B& rhs) const { + return a[0] < rhs.a[0]; + } +}; + +B maxTT1 = std::max(B(), B()); +// CHECK-FIXES: B maxTT1 = std::max(B(), B()); + +B maxTT2 = std::max(B(), std::max(B(), B())); +// CHECK-FIXES: B maxTT2 = std::max(B(), std::max(B(), B())); + +B maxTT3 = std::max(B(), std::max(B(), B()), [](const B &lhs, const B &rhs) { return lhs.a[0] < rhs.a[0]; }); +// CHECK-FIXES: B maxTT3 = std::max(B(), std::max(B(), B()), [](const B &lhs, const B &rhs) { return lhs.a[0] < rhs.a[0]; }); + + +} // namespace + diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp new file mode 100644 index 00000000000000..edbe2e1c37c770 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/math-missing-parentheses.cpp @@ -0,0 +1,120 @@ +// RUN: %check_clang_tidy %s readability-math-missing-parentheses %t + +#define MACRO_AND & +#define MACRO_ADD + +#define MACRO_OR | +#define MACRO_MULTIPLY * +#define MACRO_XOR ^ +#define MACRO_SUBTRACT - +#define MACRO_DIVIDE / + +int foo(){ + return 5; +} + +int bar(){ + return 4; +} + +class fun{ +public: + int A; + double B; + fun(){ + A = 5; + B = 5.4; + } +}; + +void f(){ + //CHECK-MESSAGES: :[[@LINE+2]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int a = 1 + (2 * 3); + int a = 1 + 2 * 3; + + int a_negative = 1 + (2 * 3); // No warning + + int b = 1 + 2 + 3; // No warning + + int c = 1 * 2 * 3; // No warning + + //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+2]]:25: warning: '/' has higher precedence than '-'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int d = 1 + (2 * 3) - (4 / 5); + int d = 1 + 2 * 3 - 4 / 5; + + int d_negative = 1 + (2 * 3) - (4 / 5); // No warning + + //CHECK-MESSAGES: :[[@LINE+4]]:13: warning: '&' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '+' has higher precedence than '&'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+2]]:25: warning: '*' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int e = (1 & (2 + 3)) | (4 * 5); + int e = 1 & 2 + 3 | 4 * 5; + + int e_negative = (1 & (2 + 3)) | (4 * 5); // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int f = (1 * -2) + 4; + int f = 1 * -2 + 4; + + int f_negative = (1 * -2) + 4; // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int g = (1 * 2 * 3) + 4 + 5; + int g = 1 * 2 * 3 + 4 + 5; + + int g_negative = (1 * 2 * 3) + 4 + 5; // No warning + + //CHECK-MESSAGES: :[[@LINE+4]]:13: warning: '&' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+3]]:19: warning: '+' has higher precedence than '&'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+2]]:27: warning: '*' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int h = (120 & (2 + 3)) | (22 * 5); + int h = 120 & 2 + 3 | 22 * 5; + + int h_negative = (120 & (2 + 3)) | (22 * 5); // No warning + + int i = 1 & 2 & 3; // No warning + + int j = 1 | 2 | 3; // No warning + + int k = 1 ^ 2 ^ 3; // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '+' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int l = (1 + 2) ^ 3; + int l = 1 + 2 ^ 3; + + int l_negative = (1 + 2) ^ 3; // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int m = (2 * foo()) + bar(); + int m = 2 * foo() + bar(); + + int m_negative = (2 * foo()) + bar(); // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:13: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int n = (1.05 * foo()) + double(bar()); + int n = 1.05 * foo() + double(bar()); + + int n_negative = (1.05 * foo()) + double(bar()); // No warning + + //CHECK-MESSAGES: :[[@LINE+3]]:17: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int o = 1 + (obj.A * 3) + obj.B; + fun obj; + int o = 1 + obj.A * 3 + obj.B; + + int o_negative = 1 + (obj.A * 3) + obj.B; // No warning + + //CHECK-MESSAGES: :[[@LINE+2]]:18: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int p = 1U + (2 * 3); + int p = 1U + 2 * 3; + + int p_negative = 1U + (2 * 3); // No warning + + //CHECK-MESSAGES: :[[@LINE+7]]:13: warning: '+' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+6]]:25: warning: '*' has higher precedence than '+'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+5]]:53: warning: '&' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+4]]:53: warning: '^' has higher precedence than '|'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+3]]:77: warning: '-' has higher precedence than '^'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-MESSAGES: :[[@LINE+2]]:94: warning: '/' has higher precedence than '-'; add parentheses to explicitly specify the order of operations [readability-math-missing-parentheses] + //CHECK-FIXES: int q = (1 MACRO_ADD (2 MACRO_MULTIPLY 3)) MACRO_OR ((4 MACRO_AND 5) MACRO_XOR (6 MACRO_SUBTRACT (7 MACRO_DIVIDE 8))); + int q = 1 MACRO_ADD 2 MACRO_MULTIPLY 3 MACRO_OR 4 MACRO_AND 5 MACRO_XOR 6 MACRO_SUBTRACT 7 MACRO_DIVIDE 8; // No warning +} diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake index fa972636553f1f..c164d5497275f3 100644 --- a/clang/cmake/caches/Release.cmake +++ b/clang/cmake/caches/Release.cmake @@ -1,95 +1,93 @@ # Plain options configure the first build. # BOOTSTRAP_* options configure the second build. # BOOTSTRAP_BOOTSTRAP_* options configure the third build. +# PGO Builds have 3 stages (stage1, stage2-instrumented, stage2) +# non-PGO Builds have 2 stages (stage1, stage2) -# General Options + +function (set_final_stage_var name value type) + if (LLVM_RELEASE_ENABLE_PGO) + set(BOOTSTRAP_BOOTSTRAP_${name} ${value} CACHE ${type} "") + else() + set(BOOTSTRAP_${name} ${value} CACHE ${type} "") + endif() +endfunction() + +function (set_instrument_and_final_stage_var name value type) + # This sets the varaible for the final stage in non-PGO builds and in + # the stage2-instrumented stage for PGO builds. + set(BOOTSTRAP_${name} ${value} CACHE ${type} "") + if (LLVM_RELEASE_ENABLE_PGO) + # Set the variable in the final stage for PGO builds. + set(BOOTSTRAP_BOOTSTRAP_${name} ${value} CACHE ${type} "") + endif() +endfunction() + +# General Options: +# If you want to override any of the LLVM_RELEASE_* variables you can set them +# on the command line via -D, but you need to do this before you pass this +# cache file to CMake via -C. e.g. +# +# cmake -D LLVM_RELEASE_ENABLE_PGO=ON -C Release.cmake set(LLVM_RELEASE_ENABLE_LTO THIN CACHE STRING "") set(LLVM_RELEASE_ENABLE_PGO OFF CACHE BOOL "") - +set(LLVM_RELEASE_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") +set(LLVM_RELEASE_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "") +# Note we don't need to add install here, since it is one of the pre-defined +# steps. +set(LLVM_RELEASE_FINAL_STAGE_TARGETS "clang;package;check-all;check-llvm;check-clang" CACHE STRING "") set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "") -# Stage 1 Bootstrap Setup +# Stage 1 Options +set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") + +set(STAGE1_PROJECTS "clang") +set(STAGE1_RUNTIMES "") + if (LLVM_RELEASE_ENABLE_PGO) + list(APPEND STAGE1_PROJECTS "lld") + list(APPEND STAGE1_RUNTIMES "compiler-rt") set(CLANG_BOOTSTRAP_TARGETS generate-profdata - stage2 stage2-package stage2-clang - stage2-distribution stage2-install - stage2-install-distribution - stage2-install-distribution-toolchain stage2-check-all stage2-check-llvm - stage2-check-clang - stage2-test-suite CACHE STRING "") -else() - set(CLANG_BOOTSTRAP_TARGETS - clang - check-all - check-llvm - check-clang - test-suite - stage3 - stage3-clang - stage3-check-all - stage3-check-llvm - stage3-check-clang - stage3-install - stage3-test-suite CACHE STRING "") -endif() + stage2-check-clang CACHE STRING "") -# Stage 1 Options -set(STAGE1_PROJECTS "clang") -set(STAGE1_RUNTIMES "") + # Configuration for stage2-instrumented + set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "") + # This enables the build targets for the final stage which is called stage2. + set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS ${LLVM_RELEASE_FINAL_STAGE_TARGETS} CACHE STRING "") + set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE STRING "") + set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt" CACHE STRING "") + set(BOOTSTRAP_LLVM_ENABLE_PROJECTS "clang;lld" CACHE STRING "") -if (LLVM_RELEASE_ENABLE_PGO) - list(APPEND STAGE1_PROJECTS "lld") - list(APPEND STAGE1_RUNTIMES "compiler-rt") +else() + if (LLVM_RELEASE_ENABLE_LTO) + list(APPEND STAGE1_PROJECTS "lld") + endif() + # Any targets added here will be given the target name stage2-${target}, so + # if you want to run them you can just use: + # ninja -C $BUILDDIR stage2-${target} + set(CLANG_BOOTSTRAP_TARGETS ${LLVM_RELEASE_FINAL_STAGE_TARGETS} CACHE STRING "") endif() +# Stage 1 Common Config set(LLVM_ENABLE_RUNTIMES ${STAGE1_RUNTIMES} CACHE STRING "") set(LLVM_ENABLE_PROJECTS ${STAGE1_PROJECTS} CACHE STRING "") -set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") - -# Stage 2 Bootstrap Setup -set(BOOTSTRAP_CLANG_ENABLE_BOOTSTRAP ON CACHE STRING "") -set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS - clang - package - check-all - check-llvm - check-clang CACHE STRING "") - -# Stage 2 Options -set(STAGE2_PROJECTS "clang") -set(STAGE2_RUNTIMES "") - -if (LLVM_RELEASE_ENABLE_LTO OR LLVM_RELEASE_ENABLE_PGO) - list(APPEND STAGE2_PROJECTS "lld") -endif() - -if (LLVM_RELEASE_ENABLE_PGO) - set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE STRING "") - list(APPEND STAGE2_RUNTIMES "compiler-rt") - set(BOOTSTRAP_LLVM_ENABLE_LTO ${LLVM_RELEASE_ENABLE_LTO}) - if (LLVM_RELEASE_ENABLE_LTO) - set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "") - endif() +# stage2-instrumented and Final Stage Config: +# Options that need to be set in both the instrumented stage (if we are doing +# a pgo build) and the final stage. +set_instrument_and_final_stage_var(LLVM_ENABLE_LTO "${LLVM_RELEASE_ENABLE_LTO}" STRING) +if (LLVM_RELEASE_ENABLE_LTO) + set_instrument_and_final_stage_var(LLVM_ENABLE_LLD "ON" BOOL) endif() -set(BOOTSTRAP_LLVM_ENABLE_PROJECTS ${STAGE2_PROJECTS} CACHE STRING "") -set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES ${STAGE2_RUNTIMES} CACHE STRING "") -if (NOT LLVM_RELEASE_ENABLE_PGO) - set(BOOTSTRAP_LLVM_TARGETS_TO_BUILD Native CACHE STRING "") -endif() +# Final Stage Config (stage2) +set_final_stage_var(LLVM_ENABLE_RUNTIMES "${LLVM_RELEASE_ENABLE_RUNTIMES}" STRING) +set_final_stage_var(LLVM_ENABLE_PROJECTS "${LLVM_RELEASE_ENABLE_PROJECTS}" STRING) -# Stage 3 Options -set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "") -set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_PROJECTS "clang;lld;lldb;clang-tools-extra;bolt;polly;mlir;flang" CACHE STRING "") -set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LTO ${LLVM_RELEASE_ENABLE_LTO} CACHE STRING "") -if (LLVM_RELEASE_ENABLE_LTO) - set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "") -endif() diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c951282e76c3b2..1e091f39564a58 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -212,6 +212,16 @@ Non-comprehensive list of changes in this release - ``__typeof_unqual__`` is available in all C modes as an extension, which behaves like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``. + +* Shared libraries linked with either the ``-ffast-math``, ``-Ofast``, or + ``-funsafe-math-optimizations`` flags will no longer enable flush-to-zero + floating-point mode by default. This decision can be overridden with use of + ``-mdaz-ftz``. This behavior now matches GCC's behavior. + (`#57589 `_) + +* ``-fdenormal-fp-math=preserve-sign`` is no longer implied by ``-ffast-math`` + on x86 systems. + New Compiler Flags ------------------ - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 8df40566fcba3d..d0326f01d251e0 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1506,7 +1506,8 @@ floating point semantic models: precise (the default), strict, and fast. * ``-ffp-contract=fast`` - Note: ``-ffast-math`` causes ``crtfastmath.o`` to be linked with code. See + Note: ``-ffast-math`` causes ``crtfastmath.o`` to be linked with code unless + ``-shared`` or ``-mno-daz-ftz`` is present. See :ref:`crtfastmath.o` for more details. .. option:: -fno-fast-math @@ -1560,7 +1561,8 @@ floating point semantic models: precise (the default), strict, and fast. ``-ffp-contract``. Note: ``-fno-fast-math`` implies ``-fdenormal-fp-math=ieee``. - ``-fno-fast-math`` causes ``crtfastmath.o`` to not be linked with code. + ``-fno-fast-math`` causes ``crtfastmath.o`` to not be linked with code + unless ``-mdaz-ftz`` is present. .. option:: -fdenormal-fp-math= @@ -1938,10 +1940,13 @@ by using ``#pragma STDC FENV_ROUND`` with a value other than ``FE_DYNAMIC``. A note about ``crtfastmath.o`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``-ffast-math`` and ``-funsafe-math-optimizations`` cause ``crtfastmath.o`` to be -automatically linked, which adds a static constructor that sets the FTZ/DAZ +``-ffast-math`` and ``-funsafe-math-optimizations`` without the ``-shared`` +option cause ``crtfastmath.o`` to be +automatically linked, which adds a static constructor that sets the FTZ/DAZ bits in MXCSR, affecting not only the current compilation unit but all static -and shared libraries included in the program. +and shared libraries included in the program. This decision can be overridden +by using either the flag ``-mdaz-ftz`` or ``-mno-daz-ftz`` to respectively +link or not link ``crtfastmath.o``. .. _FLT_EVAL_METHOD: diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 7a8bd985a91fc0..365b607c741179 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -1644,8 +1644,9 @@ enum CXCursorKind { CXCursor_ObjCSelfExpr = 146, /** OpenMP 5.0 [2.1.5, Array Section]. + * OpenACC 3.3 [2.7.1, Data Specification for Data Clauses (Sub Arrays)] */ - CXCursor_OMPArraySectionExpr = 147, + CXCursor_ArraySectionExpr = 147, /** Represents an @available(...) check. */ diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 24388ad5dea5e6..a662d94994ecdb 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1127,7 +1127,8 @@ class ASTContext : public RefCountedBase { CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy; CanQualType OCLQueueTy, OCLReserveIDTy; CanQualType IncompleteMatrixIdxTy; - CanQualType OMPArraySectionTy, OMPArrayShapingTy, OMPIteratorTy; + CanQualType ArraySectionTy; + CanQualType OMPArrayShapingTy, OMPIteratorTy; #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ CanQualType Id##Ty; #include "clang/Basic/OpenCLExtensionTypes.def" diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def index c04f6f6f127191..0a36fdc5d9c0f7 100644 --- a/clang/include/clang/AST/BuiltinTypes.def +++ b/clang/include/clang/AST/BuiltinTypes.def @@ -320,7 +320,7 @@ PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy) PLACEHOLDER_TYPE(IncompleteMatrixIdx, IncompleteMatrixIdxTy) // A placeholder type for OpenMP array sections. -PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy) +PLACEHOLDER_TYPE(ArraySection, ArraySectionTy) // A placeholder type for OpenMP array shaping operation. PLACEHOLDER_TYPE(OMPArrayShaping, OMPArrayShapingTy) diff --git a/clang/include/clang/AST/ComputeDependence.h b/clang/include/clang/AST/ComputeDependence.h index 7abf9141237dc8..6d3a51c379f9df 100644 --- a/clang/include/clang/AST/ComputeDependence.h +++ b/clang/include/clang/AST/ComputeDependence.h @@ -94,7 +94,7 @@ class DesignatedInitExpr; class ParenListExpr; class PseudoObjectExpr; class AtomicExpr; -class OMPArraySectionExpr; +class ArraySectionExpr; class OMPArrayShapingExpr; class OMPIteratorExpr; class ObjCArrayLiteral; @@ -189,7 +189,7 @@ ExprDependence computeDependence(ParenListExpr *E); ExprDependence computeDependence(PseudoObjectExpr *E); ExprDependence computeDependence(AtomicExpr *E); -ExprDependence computeDependence(OMPArraySectionExpr *E); +ExprDependence computeDependence(ArraySectionExpr *E); ExprDependence computeDependence(OMPArrayShapingExpr *E); ExprDependence computeDependence(OMPIteratorExpr *E); diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 2bfefeabc348be..f2bf667636dc9b 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -6610,6 +6610,275 @@ class TypoExpr : public Expr { }; +/// This class represents BOTH the OpenMP Array Section and OpenACC 'subarray', +/// with a boolean differentiator. +/// OpenMP 5.0 [2.1.5, Array Sections]. +/// To specify an array section in an OpenMP construct, array subscript +/// expressions are extended with the following syntax: +/// \code +/// [ lower-bound : length : stride ] +/// [ lower-bound : length : ] +/// [ lower-bound : length ] +/// [ lower-bound : : stride ] +/// [ lower-bound : : ] +/// [ lower-bound : ] +/// [ : length : stride ] +/// [ : length : ] +/// [ : length ] +/// [ : : stride ] +/// [ : : ] +/// [ : ] +/// \endcode +/// The array section must be a subset of the original array. +/// Array sections are allowed on multidimensional arrays. Base language array +/// subscript expressions can be used to specify length-one dimensions of +/// multidimensional array sections. +/// Each of the lower-bound, length, and stride expressions if specified must be +/// an integral type expressions of the base language. When evaluated +/// they represent a set of integer values as follows: +/// \code +/// { lower-bound, lower-bound + stride, lower-bound + 2 * stride,... , +/// lower-bound + ((length - 1) * stride) } +/// \endcode +/// The lower-bound and length must evaluate to non-negative integers. +/// The stride must evaluate to a positive integer. +/// When the size of the array dimension is not known, the length must be +/// specified explicitly. +/// When the stride is absent it defaults to 1. +/// When the length is absent it defaults to ⌈(size − lower-bound)/stride⌉, +/// where size is the size of the array dimension. When the lower-bound is +/// absent it defaults to 0. +/// +/// +/// OpenACC 3.3 [2.7.1 Data Specification in Data Clauses] +/// In C and C++, a subarray is an array name followed by an extended array +/// range specification in brackets, with start and length, such as +/// +/// AA[2:n] +/// +/// If the lower bound is missing, zero is used. If the length is missing and +/// the array has known size, the size of the array is used; otherwise the +/// length is required. The subarray AA[2:n] means elements AA[2], AA[3], . . . +/// , AA[2+n-1]. In C and C++, a two dimensional array may be declared in at +/// least four ways: +/// +/// -Statically-sized array: float AA[100][200]; +/// -Pointer to statically sized rows: typedef float row[200]; row* BB; +/// -Statically-sized array of pointers: float* CC[200]; +/// -Pointer to pointers: float** DD; +/// +/// Each dimension may be statically sized, or a pointer to dynamically +/// allocated memory. Each of these may be included in a data clause using +/// subarray notation to specify a rectangular array: +/// +/// -AA[2:n][0:200] +/// -BB[2:n][0:m] +/// -CC[2:n][0:m] +/// -DD[2:n][0:m] +/// +/// Multidimensional rectangular subarrays in C and C++ may be specified for any +/// array with any combination of statically-sized or dynamically-allocated +/// dimensions. For statically sized dimensions, all dimensions except the first +/// must specify the whole extent to preserve the contiguous data restriction, +/// discussed below. For dynamically allocated dimensions, the implementation +/// will allocate pointers in device memory corresponding to the pointers in +/// local memory and will fill in those pointers as appropriate. +/// +/// In Fortran, a subarray is an array name followed by a comma-separated list +/// of range specifications in parentheses, with lower and upper bound +/// subscripts, such as +/// +/// arr(1:high,low:100) +/// +/// If either the lower or upper bounds are missing, the declared or allocated +/// bounds of the array, if known, are used. All dimensions except the last must +/// specify the whole extent, to preserve the contiguous data restriction, +/// discussed below. +/// +/// Restrictions +/// +/// -In Fortran, the upper bound for the last dimension of an assumed-size dummy +/// array must be specified. +/// +/// -In C and C++, the length for dynamically allocated dimensions of an array +/// must be explicitly specified. +/// +/// -In C and C++, modifying pointers in pointer arrays during the data +/// lifetime, either on the host or on the device, may result in undefined +/// behavior. +/// +/// -If a subarray appears in a data clause, the implementation may choose to +/// allocate memory for only that subarray on the accelerator. +/// +/// -In Fortran, array pointers may appear, but pointer association is not +/// preserved in device memory. +/// +/// -Any array or subarray in a data clause, including Fortran array pointers, +/// must be a contiguous section of memory, except for dynamic multidimensional +/// C arrays. +/// +/// -In C and C++, if a variable or array of composite type appears, all the +/// data members of the struct or class are allocated and copied, as +/// appropriate. If a composite member is a pointer type, the data addressed by +/// that pointer are not implicitly copied. +/// +/// -In Fortran, if a variable or array of composite type appears, all the +/// members of that derived type are allocated and copied, as appropriate. If +/// any member has the allocatable or pointer attribute, the data accessed +/// through that member are not copied. +/// +/// -If an expression is used in a subscript or subarray expression in a clause +/// on a data construct, the same value is used when copying data at the end of +/// the data region, even if the values of variables in the expression change +/// during the data region. +class ArraySectionExpr : public Expr { + friend class ASTStmtReader; + friend class ASTStmtWriter; + +public: + enum ArraySectionType { OMPArraySection, OpenACCArraySection }; + +private: + enum { + BASE, + LOWER_BOUND, + LENGTH, + STRIDE, + END_EXPR, + OPENACC_END_EXPR = STRIDE + }; + + ArraySectionType ASType = OMPArraySection; + Stmt *SubExprs[END_EXPR] = {nullptr}; + SourceLocation ColonLocFirst; + SourceLocation ColonLocSecond; + SourceLocation RBracketLoc; + +public: + // Constructor for OMP array sections, which include a 'stride'. + ArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, Expr *Stride, + QualType Type, ExprValueKind VK, ExprObjectKind OK, + SourceLocation ColonLocFirst, SourceLocation ColonLocSecond, + SourceLocation RBracketLoc) + : Expr(ArraySectionExprClass, Type, VK, OK), ASType(OMPArraySection), + ColonLocFirst(ColonLocFirst), ColonLocSecond(ColonLocSecond), + RBracketLoc(RBracketLoc) { + setBase(Base); + setLowerBound(LowerBound); + setLength(Length); + setStride(Stride); + setDependence(computeDependence(this)); + } + + // Constructor for OpenACC sub-arrays, which do not permit a 'stride'. + ArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, QualType Type, + ExprValueKind VK, ExprObjectKind OK, SourceLocation ColonLoc, + SourceLocation RBracketLoc) + : Expr(ArraySectionExprClass, Type, VK, OK), ASType(OpenACCArraySection), + ColonLocFirst(ColonLoc), RBracketLoc(RBracketLoc) { + setBase(Base); + setLowerBound(LowerBound); + setLength(Length); + setDependence(computeDependence(this)); + } + + /// Create an empty array section expression. + explicit ArraySectionExpr(EmptyShell Shell) + : Expr(ArraySectionExprClass, Shell) {} + + /// Return original type of the base expression for array section. + static QualType getBaseOriginalType(const Expr *Base); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == ArraySectionExprClass; + } + + bool isOMPArraySection() const { return ASType == OMPArraySection; } + bool isOpenACCArraySection() const { return ASType == OpenACCArraySection; } + + /// Get base of the array section. + Expr *getBase() { return cast(SubExprs[BASE]); } + const Expr *getBase() const { return cast(SubExprs[BASE]); } + + /// Get lower bound of array section. + Expr *getLowerBound() { return cast_or_null(SubExprs[LOWER_BOUND]); } + const Expr *getLowerBound() const { + return cast_or_null(SubExprs[LOWER_BOUND]); + } + + /// Get length of array section. + Expr *getLength() { return cast_or_null(SubExprs[LENGTH]); } + const Expr *getLength() const { return cast_or_null(SubExprs[LENGTH]); } + + /// Get stride of array section. + Expr *getStride() { + assert(ASType != OpenACCArraySection && + "Stride not valid in OpenACC subarrays"); + return cast_or_null(SubExprs[STRIDE]); + } + + const Expr *getStride() const { + assert(ASType != OpenACCArraySection && + "Stride not valid in OpenACC subarrays"); + return cast_or_null(SubExprs[STRIDE]); + } + + SourceLocation getBeginLoc() const LLVM_READONLY { + return getBase()->getBeginLoc(); + } + SourceLocation getEndLoc() const LLVM_READONLY { return RBracketLoc; } + + SourceLocation getColonLocFirst() const { return ColonLocFirst; } + SourceLocation getColonLocSecond() const { + assert(ASType != OpenACCArraySection && + "second colon for stride not valid in OpenACC subarrays"); + return ColonLocSecond; + } + SourceLocation getRBracketLoc() const { return RBracketLoc; } + + SourceLocation getExprLoc() const LLVM_READONLY { + return getBase()->getExprLoc(); + } + + child_range children() { + return child_range( + &SubExprs[BASE], + &SubExprs[ASType == OMPArraySection ? END_EXPR : OPENACC_END_EXPR]); + } + + const_child_range children() const { + return const_child_range( + &SubExprs[BASE], + &SubExprs[ASType == OMPArraySection ? END_EXPR : OPENACC_END_EXPR]); + } + +private: + /// Set base of the array section. + void setBase(Expr *E) { SubExprs[BASE] = E; } + + /// Set lower bound of the array section. + void setLowerBound(Expr *E) { SubExprs[LOWER_BOUND] = E; } + + /// Set length of the array section. + void setLength(Expr *E) { SubExprs[LENGTH] = E; } + + /// Set length of the array section. + void setStride(Expr *E) { + assert(ASType != OpenACCArraySection && + "Stride not valid in OpenACC subarrays"); + SubExprs[STRIDE] = E; + } + + void setColonLocFirst(SourceLocation L) { ColonLocFirst = L; } + + void setColonLocSecond(SourceLocation L) { + assert(ASType != OpenACCArraySection && + "second colon for stride not valid in OpenACC subarrays"); + ColonLocSecond = L; + } + void setRBracketLoc(SourceLocation L) { RBracketLoc = L; } +}; + /// Frontend produces RecoveryExprs on semantic errors that prevent creating /// other well-formed expressions. E.g. when type-checking of a binary operator /// fails, we cannot produce a BinaryOperator expression. Instead, we can choose diff --git a/clang/include/clang/AST/ExprOpenMP.h b/clang/include/clang/AST/ExprOpenMP.h index be5b1f3fdd112f..54a0c203f656c3 100644 --- a/clang/include/clang/AST/ExprOpenMP.h +++ b/clang/include/clang/AST/ExprOpenMP.h @@ -17,130 +17,6 @@ #include "clang/AST/Expr.h" namespace clang { -/// OpenMP 5.0 [2.1.5, Array Sections]. -/// To specify an array section in an OpenMP construct, array subscript -/// expressions are extended with the following syntax: -/// \code -/// [ lower-bound : length : stride ] -/// [ lower-bound : length : ] -/// [ lower-bound : length ] -/// [ lower-bound : : stride ] -/// [ lower-bound : : ] -/// [ lower-bound : ] -/// [ : length : stride ] -/// [ : length : ] -/// [ : length ] -/// [ : : stride ] -/// [ : : ] -/// [ : ] -/// \endcode -/// The array section must be a subset of the original array. -/// Array sections are allowed on multidimensional arrays. Base language array -/// subscript expressions can be used to specify length-one dimensions of -/// multidimensional array sections. -/// Each of the lower-bound, length, and stride expressions if specified must be -/// an integral type expressions of the base language. When evaluated -/// they represent a set of integer values as follows: -/// \code -/// { lower-bound, lower-bound + stride, lower-bound + 2 * stride,... , -/// lower-bound + ((length - 1) * stride) } -/// \endcode -/// The lower-bound and length must evaluate to non-negative integers. -/// The stride must evaluate to a positive integer. -/// When the size of the array dimension is not known, the length must be -/// specified explicitly. -/// When the stride is absent it defaults to 1. -/// When the length is absent it defaults to ⌈(size − lower-bound)/stride⌉, -/// where size is the size of the array dimension. When the lower-bound is -/// absent it defaults to 0. -class OMPArraySectionExpr : public Expr { - enum { BASE, LOWER_BOUND, LENGTH, STRIDE, END_EXPR }; - Stmt *SubExprs[END_EXPR]; - SourceLocation ColonLocFirst; - SourceLocation ColonLocSecond; - SourceLocation RBracketLoc; - -public: - OMPArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, Expr *Stride, - QualType Type, ExprValueKind VK, ExprObjectKind OK, - SourceLocation ColonLocFirst, - SourceLocation ColonLocSecond, SourceLocation RBracketLoc) - : Expr(OMPArraySectionExprClass, Type, VK, OK), - ColonLocFirst(ColonLocFirst), ColonLocSecond(ColonLocSecond), - RBracketLoc(RBracketLoc) { - SubExprs[BASE] = Base; - SubExprs[LOWER_BOUND] = LowerBound; - SubExprs[LENGTH] = Length; - SubExprs[STRIDE] = Stride; - setDependence(computeDependence(this)); - } - - /// Create an empty array section expression. - explicit OMPArraySectionExpr(EmptyShell Shell) - : Expr(OMPArraySectionExprClass, Shell) {} - - /// An array section can be written only as Base[LowerBound:Length]. - - /// Get base of the array section. - Expr *getBase() { return cast(SubExprs[BASE]); } - const Expr *getBase() const { return cast(SubExprs[BASE]); } - /// Set base of the array section. - void setBase(Expr *E) { SubExprs[BASE] = E; } - - /// Return original type of the base expression for array section. - static QualType getBaseOriginalType(const Expr *Base); - - /// Get lower bound of array section. - Expr *getLowerBound() { return cast_or_null(SubExprs[LOWER_BOUND]); } - const Expr *getLowerBound() const { - return cast_or_null(SubExprs[LOWER_BOUND]); - } - /// Set lower bound of the array section. - void setLowerBound(Expr *E) { SubExprs[LOWER_BOUND] = E; } - - /// Get length of array section. - Expr *getLength() { return cast_or_null(SubExprs[LENGTH]); } - const Expr *getLength() const { return cast_or_null(SubExprs[LENGTH]); } - /// Set length of the array section. - void setLength(Expr *E) { SubExprs[LENGTH] = E; } - - /// Get stride of array section. - Expr *getStride() { return cast_or_null(SubExprs[STRIDE]); } - const Expr *getStride() const { return cast_or_null(SubExprs[STRIDE]); } - /// Set length of the array section. - void setStride(Expr *E) { SubExprs[STRIDE] = E; } - - SourceLocation getBeginLoc() const LLVM_READONLY { - return getBase()->getBeginLoc(); - } - SourceLocation getEndLoc() const LLVM_READONLY { return RBracketLoc; } - - SourceLocation getColonLocFirst() const { return ColonLocFirst; } - void setColonLocFirst(SourceLocation L) { ColonLocFirst = L; } - - SourceLocation getColonLocSecond() const { return ColonLocSecond; } - void setColonLocSecond(SourceLocation L) { ColonLocSecond = L; } - - SourceLocation getRBracketLoc() const { return RBracketLoc; } - void setRBracketLoc(SourceLocation L) { RBracketLoc = L; } - - SourceLocation getExprLoc() const LLVM_READONLY { - return getBase()->getExprLoc(); - } - - static bool classof(const Stmt *T) { - return T->getStmtClass() == OMPArraySectionExprClass; - } - - child_range children() { - return child_range(&SubExprs[BASE], &SubExprs[END_EXPR]); - } - - const_child_range children() const { - return const_child_range(&SubExprs[BASE], &SubExprs[END_EXPR]); - } -}; - /// An explicit cast in C or a C-style cast in C++, which uses the syntax /// ([s1][s2]...[sn])expr. For example: @c ([3][3])f. class OMPArrayShapingExpr final diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 7eb92e304a3856..f9b145b4e86a55 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2740,7 +2740,7 @@ DEF_TRAVERSE_STMT(CXXMemberCallExpr, {}) DEF_TRAVERSE_STMT(AddrLabelExpr, {}) DEF_TRAVERSE_STMT(ArraySubscriptExpr, {}) DEF_TRAVERSE_STMT(MatrixSubscriptExpr, {}) -DEF_TRAVERSE_STMT(OMPArraySectionExpr, {}) +DEF_TRAVERSE_STMT(ArraySectionExpr, {}) DEF_TRAVERSE_STMT(OMPArrayShapingExpr, {}) DEF_TRAVERSE_STMT(OMPIteratorExpr, {}) diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 14b08d4927ec5e..fcffadacc8e631 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -370,4 +370,7 @@ def warn_missing_symbol_graph_dir : Warning< "Missing symbol graph output directory, defaulting to working directory">, InGroup; +def err_ast_action_on_llvm_ir : Error< + "cannot apply AST actions to LLVM IR file '%0'">, + DefaultFatal; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e9c941c1b068b7..e580091f879dde 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11159,7 +11159,7 @@ def err_omp_declare_mapper_redefinition : Error< "redefinition of user-defined mapper for type %0 with name %1">; def err_omp_invalid_mapper: Error< "cannot find a valid user-defined mapper for type %0 with name %1">; -def err_omp_array_section_use : Error<"OpenMP array section is not allowed here">; +def err_array_section_use : Error<"%select{OpenACC sub-array|OpenMP array section}0 is not allowed here">; def err_omp_array_shaping_use : Error<"OpenMP array shaping operation is not allowed here">; def err_omp_iterator_use : Error<"OpenMP iterator is not allowed here">; def err_omp_typecheck_section_value : Error< diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 2245fd78bfc9f0..8b4206e52cd482 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -114,6 +114,12 @@ class FileManager : public RefCountedBase { /// unsigned NextFileUID; + /// Statistics gathered during the lifetime of the FileManager. + unsigned NumDirLookups = 0; + unsigned NumFileLookups = 0; + unsigned NumDirCacheMisses = 0; + unsigned NumFileCacheMisses = 0; + // Caching. std::unique_ptr StatCache; @@ -341,6 +347,10 @@ class FileManager : public RefCountedBase { public: void PrintStats() const; + + /// Import statistics from a child FileManager and add them to this current + /// FileManager. + void AddStats(const FileManager &Other); }; } // end namespace clang diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index b4e3ae573b95e6..305f19daa4a923 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -71,7 +71,7 @@ def OffsetOfExpr : StmtNode; def UnaryExprOrTypeTraitExpr : StmtNode; def ArraySubscriptExpr : StmtNode; def MatrixSubscriptExpr : StmtNode; -def OMPArraySectionExpr : StmtNode; +def ArraySectionExpr : StmtNode; def OMPIteratorExpr : StmtNode; def CallExpr : StmtNode; def MemberExpr : StmtNode; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 41a85e8f20defb..fc0b2ac3b5900b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2617,6 +2617,11 @@ defm protect_parens : BoolFOption<"protect-parens", "floating-point expressions are evaluated">, NegFlag>; +defm daz_ftz : SimpleMFlag<"daz-ftz", + "Globally set", "Do not globally set", + " the denormals-are-zero (DAZ) and flush-to-zero (FTZ) bits in the " + "floating-point control register on program startup">; + def ffor_scope : Flag<["-"], "ffor-scope">, Group; def fno_for_scope : Flag<["-"], "fno-for-scope">, Group; diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index ea28617f79b81b..da19503c2902fd 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -193,6 +193,12 @@ class SemaOpenACC : public SemaBase { /// conversions and diagnostics to 'int'. ExprResult ActOnIntExpr(OpenACCDirectiveKind DK, OpenACCClauseKind CK, SourceLocation Loc, Expr *IntExpr); + + /// Checks and creates an Array Section used in an OpenACC construct/clause. + ExprResult ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc, + Expr *LowerBound, + SourceLocation ColonLocFirst, Expr *Length, + SourceLocation RBLoc); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 186c3b722ced16..a8df5a0bda0850 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -973,8 +973,8 @@ enum PredefinedTypeIDs { /// OpenCL reserve_id type. PREDEF_TYPE_RESERVE_ID_ID = 41, - /// The placeholder type for OpenMP array section. - PREDEF_TYPE_OMP_ARRAY_SECTION = 42, + /// The placeholder type for an array section. + PREDEF_TYPE_ARRAY_SECTION = 42, /// The '__float128' type PREDEF_TYPE_FLOAT128_ID = 43, @@ -1926,7 +1926,7 @@ enum StmtCode { STMT_OMP_TARGET_TEAMS_GENERIC_LOOP_DIRECTIVE, STMT_OMP_PARALLEL_GENERIC_LOOP_DIRECTIVE, STMT_OMP_TARGET_PARALLEL_GENERIC_LOOP_DIRECTIVE, - EXPR_OMP_ARRAY_SECTION, + EXPR_ARRAY_SECTION, EXPR_OMP_ARRAY_SHAPING, EXPR_OMP_ITERATOR, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 0a42eb53775b75..486d6ef620ad96 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1321,16 +1321,14 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, // Placeholder type for OMP array sections. if (LangOpts.OpenMP) { - InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection); + InitBuiltinType(ArraySectionTy, BuiltinType::ArraySection); InitBuiltinType(OMPArrayShapingTy, BuiltinType::OMPArrayShaping); InitBuiltinType(OMPIteratorTy, BuiltinType::OMPIterator); } - // Placeholder type for OpenACC array sections. - if (LangOpts.OpenACC) { - // FIXME: Once we implement OpenACC array sections in Sema, this will either - // be combined with the OpenMP type, or given its own type. In the meantime, - // just use the OpenMP type so that parsing can work. - InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection); + // Placeholder type for OpenACC array sections, if we are ALSO in OMP mode, + // don't bother, as we're just using the same type as OMP. + if (LangOpts.OpenACC && !LangOpts.OpenMP) { + InitBuiltinType(ArraySectionTy, BuiltinType::ArraySection); } if (LangOpts.MatrixTypes) InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx); diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 5ec3013fabba9a..bad8e75b2f878c 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -443,12 +443,17 @@ ExprDependence clang::computeDependence(ObjCIndirectCopyRestoreExpr *E) { return E->getSubExpr()->getDependence(); } -ExprDependence clang::computeDependence(OMPArraySectionExpr *E) { +ExprDependence clang::computeDependence(ArraySectionExpr *E) { auto D = E->getBase()->getDependence(); if (auto *LB = E->getLowerBound()) D |= LB->getDependence(); if (auto *Len = E->getLength()) D |= Len->getDependence(); + + if (E->isOMPArraySection()) { + if (auto *Stride = E->getStride()) + D |= Stride->getDependence(); + } return D; } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 9eec7edc9d1a3e..63dcdb919c7117 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -3680,7 +3680,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case ParenExprClass: case ArraySubscriptExprClass: case MatrixSubscriptExprClass: - case OMPArraySectionExprClass: + case ArraySectionExprClass: case OMPArrayShapingExprClass: case OMPIteratorExprClass: case MemberExprClass: @@ -5060,9 +5060,9 @@ QualType AtomicExpr::getValueType() const { return T; } -QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) { +QualType ArraySectionExpr::getBaseOriginalType(const Expr *Base) { unsigned ArraySectionCount = 0; - while (auto *OASE = dyn_cast(Base->IgnoreParens())) { + while (auto *OASE = dyn_cast(Base->IgnoreParens())) { Base = OASE->getBase(); ++ArraySectionCount; } diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index 7026fca8554ce9..2bb8f9aeedc7e2 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -145,7 +145,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::FunctionParmPackExprClass: case Expr::MSPropertyRefExprClass: case Expr::MSPropertySubscriptExprClass: - case Expr::OMPArraySectionExprClass: + case Expr::ArraySectionExprClass: case Expr::OMPArrayShapingExprClass: case Expr::OMPIteratorExprClass: return Cl::CL_LValue; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index de3c2a63913e94..ea3e7304a7423c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16130,7 +16130,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { case Expr::StringLiteralClass: case Expr::ArraySubscriptExprClass: case Expr::MatrixSubscriptExprClass: - case Expr::OMPArraySectionExprClass: + case Expr::ArraySectionExprClass: case Expr::OMPArrayShapingExprClass: case Expr::OMPIteratorExprClass: case Expr::MemberExprClass: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 106c69dd5beed7..ed9e6eeb36c75d 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4715,7 +4715,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, case Expr::MSPropertySubscriptExprClass: case Expr::TypoExprClass: // This should no longer exist in the AST by now. case Expr::RecoveryExprClass: - case Expr::OMPArraySectionExprClass: + case Expr::ArraySectionExprClass: case Expr::OMPArrayShapingExprClass: case Expr::OMPIteratorExprClass: case Expr::CXXInheritedCtorInitExprClass: diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp index ecc56c13fb7573..6f586173edb021 100644 --- a/clang/lib/AST/NSAPI.cpp +++ b/clang/lib/AST/NSAPI.cpp @@ -462,7 +462,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const { case BuiltinType::PseudoObject: case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: - case BuiltinType::OMPArraySection: + case BuiltinType::ArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: case BuiltinType::BFloat16: diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 5855ab3141edcc..f010d36513a49e 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1521,7 +1521,7 @@ void StmtPrinter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *Node) { OS << "]"; } -void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) { +void StmtPrinter::VisitArraySectionExpr(ArraySectionExpr *Node) { PrintExpr(Node->getBase()); OS << "["; if (Node->getLowerBound()) @@ -1531,7 +1531,7 @@ void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) { if (Node->getLength()) PrintExpr(Node->getLength()); } - if (Node->getColonLocSecond().isValid()) { + if (Node->isOMPArraySection() && Node->getColonLocSecond().isValid()) { OS << ":"; if (Node->getStride()) PrintExpr(Node->getStride()); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c81724f84dd9ce..a95f5c6103e24d 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1435,7 +1435,7 @@ void StmtProfiler::VisitMatrixSubscriptExpr(const MatrixSubscriptExpr *S) { VisitExpr(S); } -void StmtProfiler::VisitOMPArraySectionExpr(const OMPArraySectionExpr *S) { +void StmtProfiler::VisitArraySectionExpr(const ArraySectionExpr *S) { VisitExpr(S); } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index cb22c91a12aa89..8aaa6801d85b8b 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3413,8 +3413,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { return "reserve_id_t"; case IncompleteMatrixIdx: return ""; - case OMPArraySection: - return ""; + case ArraySection: + return ""; case OMPArrayShaping: return ""; case OMPIterator: @@ -4710,7 +4710,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case BuiltinType::BuiltinFn: case BuiltinType::NullPtr: case BuiltinType::IncompleteMatrixIdx: - case BuiltinType::OMPArraySection: + case BuiltinType::ArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: return false; diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp index 21e152f6aea8a0..ce45b47d5cfea5 100644 --- a/clang/lib/AST/TypeLoc.cpp +++ b/clang/lib/AST/TypeLoc.cpp @@ -429,7 +429,7 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const { #include "clang/Basic/WebAssemblyReferenceTypes.def" case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: - case BuiltinType::OMPArraySection: + case BuiltinType::ArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: return TST_unspecified; diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index cd520a6375e07e..143c04309d0753 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -39,12 +39,6 @@ using namespace clang; #define DEBUG_TYPE "file-search" -ALWAYS_ENABLED_STATISTIC(NumDirLookups, "Number of directory lookups."); -ALWAYS_ENABLED_STATISTIC(NumFileLookups, "Number of file lookups."); -ALWAYS_ENABLED_STATISTIC(NumDirCacheMisses, - "Number of directory cache misses."); -ALWAYS_ENABLED_STATISTIC(NumFileCacheMisses, "Number of file cache misses."); - //===----------------------------------------------------------------------===// // Common logic. //===----------------------------------------------------------------------===// @@ -656,6 +650,14 @@ StringRef FileManager::getCanonicalName(const void *Entry, StringRef Name) { return CanonicalName; } +void FileManager::AddStats(const FileManager &Other) { + assert(&Other != this && "Collecting stats into the same FileManager"); + NumDirLookups += Other.NumDirLookups; + NumFileLookups += Other.NumFileLookups; + NumDirCacheMisses += Other.NumDirCacheMisses; + NumFileCacheMisses += Other.NumFileCacheMisses; +} + void FileManager::PrintStats() const { llvm::errs() << "\n*** File Manager Stats:\n"; llvm::errs() << UniqueRealFiles.size() << " real files found, " diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index dc6748d587d2ce..cbe7854c94ff51 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3628,7 +3628,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // frexpl instead of legalizing this type in the BE. if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; } case Builtin::BI__builtin_frexp: case Builtin::BI__builtin_frexpf: @@ -5366,7 +5366,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ptrauth_auth_and_resign: if (Args[4]->getType()->isPointerTy()) Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy); - LLVM_FALLTHROUGH; + [[fallthrough]]; case Builtin::BI__builtin_ptrauth_auth: case Builtin::BI__builtin_ptrauth_sign_unauthenticated: @@ -18858,7 +18858,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: AppendFalseForOpselArg = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB @@ -18867,7 +18867,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: AppendFalseForOpselArg = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b3e7aff3d96924..2ac5e0a0b95a05 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1586,6 +1586,11 @@ bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) { return RI.isIndirect() || (RI.isInAlloca() && RI.getInAllocaSRet()); } +bool CodeGenModule::ReturnTypeHasInReg(const CGFunctionInfo &FI) { + const auto &RI = FI.getReturnInfo(); + return RI.getInReg(); +} + bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) { return ReturnTypeUsesSRet(FI) && getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs(); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 64d868568e7977..799ad7081b6e1d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1621,8 +1621,8 @@ LValue CodeGenFunction::EmitLValueHelper(const Expr *E, return EmitArraySubscriptExpr(cast(E)); case Expr::MatrixSubscriptExprClass: return EmitMatrixSubscriptExpr(cast(E)); - case Expr::OMPArraySectionExprClass: - return EmitOMPArraySectionExpr(cast(E)); + case Expr::ArraySectionExprClass: + return EmitArraySectionExpr(cast(E)); case Expr::ExtVectorElementExprClass: return EmitExtVectorElementExpr(cast(E)); case Expr::CXXThisExprClass: @@ -4363,8 +4363,8 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, QualType BaseTy, QualType ElTy, bool IsLowerBound) { LValue BaseLVal; - if (auto *ASE = dyn_cast(Base->IgnoreParenImpCasts())) { - BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound); + if (auto *ASE = dyn_cast(Base->IgnoreParenImpCasts())) { + BaseLVal = CGF.EmitArraySectionExpr(ASE, IsLowerBound); if (BaseTy->isArrayType()) { Address Addr = BaseLVal.getAddress(CGF); BaseInfo = BaseLVal.getBaseInfo(); @@ -4396,9 +4396,13 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } -LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, - bool IsLowerBound) { - QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(E->getBase()); +LValue CodeGenFunction::EmitArraySectionExpr(const ArraySectionExpr *E, + bool IsLowerBound) { + + assert(!E->isOpenACCArraySection() && + "OpenACC Array section codegen not implemented"); + + QualType BaseTy = ArraySectionExpr::getBaseOriginalType(E->getBase()); QualType ResultExprTy; if (auto *AT = getContext().getAsArrayType(BaseTy)) ResultExprTy = AT->getElementType(); diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 4e7f777ba1d916..43dd38659518d1 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -2905,23 +2905,29 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, break; case CodeGenOptions::Mixed: case CodeGenOptions::NonLegacy: + StringRef name = "objc_msgSend"; if (CGM.ReturnTypeUsesFPRet(ResultType)) { - imp = - CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_fpret") - .getCallee(); + name = "objc_msgSend_fpret"; } else if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) { - // The actual types here don't matter - we're going to bitcast the - // function anyway - imp = - CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_stret") - .getCallee(); - } else { - imp = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend") - .getCallee(); + name = "objc_msgSend_stret"; + + // The address of the memory block is be passed in x8 for POD type, + // or in x0 for non-POD type (marked as inreg). + bool shouldCheckForInReg = + CGM.getContext() + .getTargetInfo() + .getTriple() + .isWindowsMSVCEnvironment() && + CGM.getContext().getTargetInfo().getTriple().isAArch64(); + if (shouldCheckForInReg && CGM.ReturnTypeHasInReg(MSI.CallInfo)) { + name = "objc_msgSend_stret2"; + } } + // The actual types here don't matter - we're going to bitcast the + // function anyway + imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), + name) + .getCallee(); } // Reset the receiver in case the lookup modified it diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index bb43af0276d183..19c2ca99181fb7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -763,8 +763,8 @@ LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E) { - if (const auto *OASE = dyn_cast(E)) - return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + if (const auto *OASE = dyn_cast(E)) + return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false); return LValue(); } @@ -821,7 +821,7 @@ void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { QualType PrivateType = getPrivateType(N); - bool AsArraySection = isa(ClausesData[N].Ref); + bool AsArraySection = isa(ClausesData[N].Ref); if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), @@ -962,9 +962,9 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { const VarDecl *OrigVD = nullptr; - if (const auto *OASE = dyn_cast(Ref)) { + if (const auto *OASE = dyn_cast(Ref)) { const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempOASE = dyn_cast(Base)) + while (const auto *TempOASE = dyn_cast(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); while (const auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); @@ -3591,9 +3591,8 @@ getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); } } else if (const auto *ASE = - dyn_cast(E->IgnoreParenImpCasts())) { - LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); + dyn_cast(E->IgnoreParenImpCasts())) { + LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false); Address UpAddrAddress = UpAddrLVal.getAddress(CGF); llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF), @@ -6693,8 +6692,8 @@ class MappableExprsHandler { // Given that an array section is considered a built-in type, we need to // do the calculation based on the length of the section instead of relying // on CGF.getTypeSize(E->getType()). - if (const auto *OAE = dyn_cast(E)) { - QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( + if (const auto *OAE = dyn_cast(E)) { + QualType BaseTy = ArraySectionExpr::getBaseOriginalType( OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); @@ -6800,7 +6799,7 @@ class MappableExprsHandler { /// Return true if the provided expression is a final array section. A /// final array section, is one whose length can't be proved to be one. bool isFinalArraySectionExpression(const Expr *E) const { - const auto *OASE = dyn_cast(E); + const auto *OASE = dyn_cast(E); // It is not an array section and therefore not a unity-size one. if (!OASE) @@ -6816,7 +6815,7 @@ class MappableExprsHandler { // for this dimension. Also, we should always expect a length if the // base type is pointer. if (!Length) { - QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + QualType BaseQTy = ArraySectionExpr::getBaseOriginalType( OASE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); if (const auto *ATy = dyn_cast(BaseQTy.getTypePtr())) @@ -7048,7 +7047,7 @@ class MappableExprsHandler { Address BP = Address::invalid(); const Expr *AssocExpr = I->getAssociatedExpression(); const auto *AE = dyn_cast(AssocExpr); - const auto *OASE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); const auto *OAShE = dyn_cast(AssocExpr); if (isa(AssocExpr)) { @@ -7200,14 +7199,14 @@ class MappableExprsHandler { // special treatment for array sections given that they are built-in // types. const auto *OASE = - dyn_cast(I->getAssociatedExpression()); + dyn_cast(I->getAssociatedExpression()); const auto *OAShE = dyn_cast(I->getAssociatedExpression()); const auto *UO = dyn_cast(I->getAssociatedExpression()); const auto *BO = dyn_cast(I->getAssociatedExpression()); bool IsPointer = OAShE || - (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) + (OASE && ArraySectionExpr::getBaseOriginalType(OASE) .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); @@ -7228,7 +7227,7 @@ class MappableExprsHandler { assert((Next == CE || isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression()) || - isa(Next->getAssociatedExpression()) || + isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression()) || isa(Next->getAssociatedExpression())) && @@ -7460,7 +7459,7 @@ class MappableExprsHandler { PartialStruct.LowestElem = {FieldIndex, LowestElem}; if (IsFinalArraySection) { Address HB = - CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) + CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) .getAddress(CGF); PartialStruct.HighestElem = {FieldIndex, HB}; } else { @@ -7473,7 +7472,7 @@ class MappableExprsHandler { } else if (FieldIndex > PartialStruct.HighestElem.first) { if (IsFinalArraySection) { Address HB = - CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) + CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) .getAddress(CGF); PartialStruct.HighestElem = {FieldIndex, HB}; } else { @@ -7531,12 +7530,12 @@ class MappableExprsHandler { for (const OMPClauseMappableExprCommon::MappableComponent &Component : Components) { const Expr *AssocExpr = Component.getAssociatedExpression(); - const auto *OASE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); if (!OASE) continue; - QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase()); auto *CAT = Context.getAsConstantArrayType(Ty); auto *VAT = Context.getAsVariableArrayType(Ty); @@ -7610,7 +7609,7 @@ class MappableExprsHandler { continue; } - const auto *OASE = dyn_cast(AssocExpr); + const auto *OASE = dyn_cast(AssocExpr); if (!OASE) continue; @@ -8801,7 +8800,7 @@ static ValueDecl *getDeclFromThisExpr(const Expr *E) { if (!E) return nullptr; - if (const auto *OASE = dyn_cast(E->IgnoreParenCasts())) + if (const auto *OASE = dyn_cast(E->IgnoreParenCasts())) if (const MemberExpr *ME = dyn_cast(OASE->getBase()->IgnoreParenImpCasts())) return ME->getMemberDecl(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index c00f96e22181f0..6407682d050eda 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -97,9 +97,9 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) { while (const auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); RefExpr = Base; - } else if (auto *OASE = dyn_cast(RefExpr)) { + } else if (auto *OASE = dyn_cast(RefExpr)) { const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempOASE = dyn_cast(Base)) + while (const auto *TempOASE = dyn_cast(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); while (const auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c3843013b0a68c..c333981daccd53 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1442,7 +1442,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( const auto *LHSVD = cast(cast(*ILHS)->getDecl()); const auto *RHSVD = cast(cast(*IRHS)->getDecl()); QualType Type = PrivateVD->getType(); - bool isaOMPArraySectionExpr = isa(IRef); + bool isaOMPArraySectionExpr = isa(IRef); if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. @@ -7630,7 +7630,7 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( static const VarDecl *getBaseDecl(const Expr *Ref) { const Expr *Base = Ref->IgnoreParenImpCasts(); - while (const auto *OASE = dyn_cast(Base)) + while (const auto *OASE = dyn_cast(Base)) Base = OASE->getBase()->IgnoreParenImpCasts(); while (const auto *ASE = dyn_cast(Base)) Base = ASE->getBase()->IgnoreParenImpCasts(); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 77069d7a5b180b..2c21d080313da3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4228,8 +4228,8 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E, bool Accessed = false); LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E); - LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, - bool IsLowerBound = true); + LValue EmitArraySectionExpr(const ArraySectionExpr *E, + bool IsLowerBound = true); LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E); LValue EmitMemberExpr(const MemberExpr *E); LValue EmitObjCIsaExpr(const ObjCIsaExpr *E); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index cf0796017d0f5e..7a458ac1382d6e 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1356,6 +1356,9 @@ class CodeGenModule : public CodeGenTypeCache { /// Return true iff the given type uses 'sret' when used as a return type. bool ReturnTypeUsesSRet(const CGFunctionInfo &FI); + /// Return true iff the given type has `inreg` set. + bool ReturnTypeHasInReg(const CGFunctionInfo &FI); + /// Return true iff the given type uses an argument slot when 'sret' is used /// as a return type. bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 1568b6e6275b9d..e8d75eda029e66 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -409,7 +409,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; case BuiltinType::LongDouble: LongDoubleReferenced = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case BuiltinType::BFloat16: case BuiltinType::Float: case BuiltinType::Double: diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index c1b9e60201b072..6af814acc4433d 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1323,9 +1323,14 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args, bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args, std::string &Path) const { + // Don't implicitly link in mode-changing libraries in a shared library, since + // this can have very deleterious effects. See the various links from + // https://github.com/llvm/llvm-project/issues/57589 for more information. + bool Default = !Args.hasArgNoClaim(options::OPT_shared); + // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed // (to keep the linker options consistent with gcc and clang itself). - if (!isOptimizationLevelFast(Args)) { + if (Default && !isOptimizationLevelFast(Args)) { // Check if -ffast-math or -funsafe-math. Arg *A = Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math, @@ -1334,8 +1339,14 @@ bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args, if (!A || A->getOption().getID() == options::OPT_fno_fast_math || A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations) - return false; + Default = false; } + + // Whatever decision came as a result of the above implicit settings, either + // -mdaz-ftz or -mno-daz-ftz is capable of overriding it. + if (!Args.hasFlag(options::OPT_mdaz_ftz, options::OPT_mno_daz_ftz, Default)) + return false; + // If crtfastmath.o exists add it to the arguments. Path = GetFilePath("crtfastmath.o"); return (Path != "crtfastmath.o"); // Not found. diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index c1b350893b3744..aab98506adb96f 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -376,9 +376,7 @@ void AIX::AddOpenMPIncludeArgs(const ArgList &DriverArgs, addSystemInclude(DriverArgs, CC1Args, PathOpenMP.str()); break; case Driver::OMPRT_IOMP5: - LLVM_FALLTHROUGH; case Driver::OMPRT_GOMP: - LLVM_FALLTHROUGH; case Driver::OMPRT_Unknown: // Unknown / unsupported include paths. break; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 79a118030e7c91..849fd1e21ddd64 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -863,46 +863,6 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, } } -/// Check whether the given input tree contains any compilation actions. -static bool ContainsCompileAction(const Action *A) { - if (isa(A) || isa(A)) - return true; - - return llvm::any_of(A->inputs(), ContainsCompileAction); -} - -/// Check if -relax-all should be passed to the internal assembler. -/// This is done by default when compiling non-assembler source with -O0. -static bool UseRelaxAll(Compilation &C, const ArgList &Args) { - bool RelaxDefault = true; - - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) - RelaxDefault = A->getOption().matches(options::OPT_O0); - - // RISC-V requires an indirect jump for offsets larger than 1MiB. This cannot - // be done by assembler branch relaxation as it needs a free temporary - // register. Because of this, branch relaxation is handled by a MachineIR - // pass before the assembler. Forcing assembler branch relaxation for -O0 - // makes the MachineIR branch relaxation inaccurate and it will miss cases - // where an indirect branch is necessary. To avoid this issue we are - // sacrificing the compile time improvement of using -mrelax-all for -O0. - if (C.getDefaultToolChain().getTriple().isRISCV()) - RelaxDefault = false; - - if (RelaxDefault) { - RelaxDefault = false; - for (const auto &Act : C.getActions()) { - if (ContainsCompileAction(Act)) { - RelaxDefault = true; - break; - } - } - } - - return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all, - RelaxDefault); -} - static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs, llvm::codegenoptions::DebugInfoKind DebugInfoKind, @@ -2514,8 +2474,16 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, const ArgList &Args, ArgStringList &CmdArgs, const Driver &D) { - if (UseRelaxAll(C, Args)) - CmdArgs.push_back("-mrelax-all"); + // Default to -mno-relax-all. + // + // Note: RISC-V requires an indirect jump for offsets larger than 1MiB. This + // cannot be done by assembler branch relaxation as it needs a free temporary + // register. Because of this, branch relaxation is handled by a MachineIR pass + // before the assembler. Forcing assembler branch relaxation for -O0 makes the + // MachineIR branch relaxation inaccurate and it will miss cases where an + // indirect branch is necessary. + Args.addOptInFlag(CmdArgs, options::OPT_mrelax_all, + options::OPT_mno_relax_all); // Only default to -mincremental-linker-compatible if we think we are // targeting the MSVC linker. diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index f2a181e51bcf26..c32cf27cb1fcd8 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -1076,25 +1076,6 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, ToolChain::addProfileRTLibs(Args, CmdArgs); } -llvm::DenormalMode -Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs, - const JobAction &JA, - const llvm::fltSemantics *FPType) const { - switch (getTriple().getArch()) { - case llvm::Triple::x86: - case llvm::Triple::x86_64: { - std::string Unused; - // DAZ and FTZ are turned on in crtfastmath.o - if (!DriverArgs.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) && - isFastMathRuntimeAvailable(DriverArgs, Unused)) - return llvm::DenormalMode::getPreserveSign(); - return llvm::DenormalMode::getIEEE(); - } - default: - return llvm::DenormalMode::getIEEE(); - } -} - void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { for (const auto &Opt : ExtraOpts) CmdArgs.push_back(Opt.c_str()); diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h index 6e5f7fb1e918d8..0cf4104814b5fe 100644 --- a/clang/lib/Driver/ToolChains/Linux.h +++ b/clang/lib/Driver/ToolChains/Linux.h @@ -65,10 +65,6 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF { std::vector ExtraOpts; - llvm::DenormalMode getDefaultDenormalModeForType( - const llvm::opt::ArgList &DriverArgs, const JobAction &JA, - const llvm::fltSemantics *FPType = nullptr) const override; - const char *getDefaultLinker() const override; protected: diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index ccb2c9190e2eff..c8d8ec3afbd990 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -807,7 +807,6 @@ template <> struct MappingTraits { FormatStyle PredefinedStyle; if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && Style == PredefinedStyle) { - IO.mapOptional("# BasedOnStyle", StyleName); BasedOnStyle = StyleName; break; } @@ -3117,6 +3116,7 @@ static void sortCppIncludes(const FormatStyle &Style, return; } + const auto OldCursor = Cursor ? *Cursor : 0; std::string result; for (unsigned Index : Indices) { if (!result.empty()) { @@ -3140,6 +3140,8 @@ static void sortCppIncludes(const FormatStyle &Style, // the entire range of blocks. Otherwise, no replacement is generated. if (replaceCRLF(result) == replaceCRLF(std::string(Code.substr( IncludesBeginOffset, IncludesBlockSize)))) { + if (Cursor) + *Cursor = OldCursor; return; } diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 6e3baf83864415..66a45b888f15cc 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1293,6 +1293,10 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, diag::remark_module_build_done) << ModuleName; + // Propagate the statistics to the parent FileManager. + if (!FrontendOpts.ModulesShareFileManager) + ImportingInstance.getFileManager().AddStats(Instance.getFileManager()); + if (Crashed) { // Clear the ASTConsumer if it hasn't been already, in case it owns streams // that must be closed before clearing output files. diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index a2af738a053e5b..9ae7664b4b49d4 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -757,8 +757,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, // IR files bypass the rest of initialization. if (Input.getKind().getLanguage() == Language::LLVM_IR) { - assert(hasIRSupport() && - "This action does not have IR file support!"); + if (!hasIRSupport()) { + CI.getDiagnostics().Report(diag::err_ast_action_on_llvm_ir) + << Input.getFile(); + return false; + } // Inform the diagnostic client we are processing a source file. CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), nullptr); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 4f44c3b7b89d4d..6bdd734e8a2752 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -389,8 +389,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Twine((unsigned)LangOpts.getHLSLVersion())); if (LangOpts.NativeHalfType) - Builder.defineMacro("__HLSL_ENABLE_16_BIT", - Twine((unsigned)LangOpts.getHLSLVersion())); + Builder.defineMacro("__HLSL_ENABLE_16_BIT", "1"); // Shader target information // "enums" for shader stages diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 32d96f81c4c8de..7d6febb04a82c4 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -31,6 +31,7 @@ #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" #include "clang/Sema/SemaCUDA.h" +#include "clang/Sema/SemaOpenACC.h" #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaSYCL.h" #include "clang/Sema/TypoCorrection.h" @@ -2070,15 +2071,22 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { if (!LHS.isInvalid() && !HasError && !Length.isInvalid() && !Stride.isInvalid() && Tok.is(tok::r_square)) { if (ColonLocFirst.isValid() || ColonLocSecond.isValid()) { - // FIXME: OpenACC hasn't implemented Sema/Array section handling at a - // semantic level yet. For now, just reuse the OpenMP implementation - // as it gets the parsing/type management mostly right, and we can - // replace this call to ActOnOpenACCArraySectionExpr in the future. - // Eventually we'll genericize the OPenMPArraySectionExpr type as - // well. - LHS = Actions.OpenMP().ActOnOMPArraySectionExpr( - LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0], - ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(), RLoc); + // Like above, AllowOpenACCArraySections is 'more specific' and only + // enabled when actively parsing a 'var' in a 'var-list' during + // clause/'cache' construct parsing, so it is more specific. So we + // should do it first, so that the correct node gets created. + if (AllowOpenACCArraySections) { + assert(!Stride.isUsable() && !ColonLocSecond.isValid() && + "Stride/second colon not allowed for OpenACC"); + LHS = Actions.OpenACC().ActOnArraySectionExpr( + LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0], + ColonLocFirst, Length.get(), RLoc); + } else { + LHS = Actions.OpenMP().ActOnOMPArraySectionExpr( + LHS.get(), Loc, ArgExprs.empty() ? nullptr : ArgExprs[0], + ColonLocFirst, ColonLocSecond, Length.get(), Stride.get(), + RLoc); + } } else { LHS = Actions.ActOnArraySubscriptExpr(getCurScope(), LHS.get(), Loc, ArgExprs, RLoc); diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 8a18fca8064ee1..29326f5d993a9d 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -327,7 +327,7 @@ OpenACCReductionOperator ParseReductionOperator(Parser &P) { return OpenACCReductionOperator::Max; if (ReductionKindTok.getIdentifierInfo()->isStr("min")) return OpenACCReductionOperator::Min; - LLVM_FALLTHROUGH; + [[fallthrough]]; default: P.Diag(ReductionKindTok, diag::err_acc_invalid_reduction_operator); return OpenACCReductionOperator::Invalid; @@ -945,7 +945,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( // the 'update' clause, so we have to handle it here. U se an assert to // make sure we get the right differentiator. assert(DirKind == OpenACCDirectiveKind::Update); - LLVM_FALLTHROUGH; + [[fallthrough]]; case OpenACCClauseKind::Attach: case OpenACCClauseKind::Copy: case OpenACCClauseKind::Delete: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7ef918970196e2..05e4c527f0b6e4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -18723,8 +18723,10 @@ void Sema::CheckArrayAccess(const Expr *expr) { expr = cast(expr)->getBase(); break; } - case Stmt::OMPArraySectionExprClass: { - const OMPArraySectionExpr *ASE = cast(expr); + case Stmt::ArraySectionExprClass: { + const ArraySectionExpr *ASE = cast(expr); + // FIXME: We should probably be checking all of the elements to the + // 'length' here as well. if (ASE->getLowerBound()) CheckArrayAccess(ASE->getBase(), ASE->getLowerBound(), /*ASE=*/nullptr, AllowOnePastEnd > 0); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 00384f9dc16aa0..c9dd6bb2413e38 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1314,7 +1314,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { // Some might be dependent for other reasons. case Expr::ArraySubscriptExprClass: case Expr::MatrixSubscriptExprClass: - case Expr::OMPArraySectionExprClass: + case Expr::ArraySectionExprClass: case Expr::OMPArrayShapingExprClass: case Expr::OMPIteratorExprClass: case Expr::BinaryOperatorClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 8d5b4199a681b9..71bb8f833d8997 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5069,11 +5069,18 @@ ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation rbLoc) { if (base && !base->getType().isNull() && - base->hasPlaceholderType(BuiltinType::OMPArraySection)) - return OpenMP().ActOnOMPArraySectionExpr(base, lbLoc, ArgExprs.front(), - SourceLocation(), SourceLocation(), - /*Length*/ nullptr, - /*Stride=*/nullptr, rbLoc); + base->hasPlaceholderType(BuiltinType::ArraySection)) { + auto *AS = cast(base); + if (AS->isOMPArraySection()) + return OpenMP().ActOnOMPArraySectionExpr( + base, lbLoc, ArgExprs.front(), SourceLocation(), SourceLocation(), + /*Length*/ nullptr, + /*Stride=*/nullptr, rbLoc); + + return OpenACC().ActOnArraySectionExpr(base, lbLoc, ArgExprs.front(), + SourceLocation(), /*Length*/ nullptr, + rbLoc); + } // Since this might be a postfix expression, get rid of ParenListExprs. if (isa(base)) { @@ -6361,7 +6368,7 @@ static bool isPlaceholderToRemoveAsArg(QualType type) { case BuiltinType::BoundMember: case BuiltinType::BuiltinFn: case BuiltinType::IncompleteMatrixIdx: - case BuiltinType::OMPArraySection: + case BuiltinType::ArraySection: case BuiltinType::OMPArrayShaping: case BuiltinType::OMPIterator: return true; @@ -21343,8 +21350,9 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) { return ExprError(); // Expressions of unknown type. - case BuiltinType::OMPArraySection: - Diag(E->getBeginLoc(), diag::err_omp_array_section_use); + case BuiltinType::ArraySection: + Diag(E->getBeginLoc(), diag::err_array_section_use) + << cast(E)->isOMPArraySection(); return ExprError(); // Expressions of unknown type. diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 793e16df178914..003a157990d307 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -7753,9 +7753,9 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, break; } - case Stmt::OMPArraySectionExprClass: { + case Stmt::ArraySectionExprClass: { visitLocalsRetainedByInitializer(Path, - cast(Init)->getBase(), + cast(Init)->getBase(), Visit, true, EnableLifetimeWarnings); break; } diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index ba69e71e30a181..d5cfe82a5d7098 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -423,6 +423,21 @@ ExprResult SemaOpenACC::ActOnIntExpr(OpenACCDirectiveKind DK, return IntExpr; } +ExprResult SemaOpenACC::ActOnArraySectionExpr(Expr *Base, SourceLocation LBLoc, + Expr *LowerBound, + SourceLocation ColonLoc, + Expr *Length, + SourceLocation RBLoc) { + ASTContext &Context = getASTContext(); + + // TODO OpenACC: We likely have to reproduce a lot of the same logic from the + // OMP version of this, but at the moment we don't have a good way to test it, + // so for now we'll just create the node. + return new (Context) + ArraySectionExpr(Base, LowerBound, Length, Context.ArraySectionTy, + VK_LValue, OK_Ordinary, ColonLoc, RBLoc); +} + bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc) { return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 4356d81bb9d7d4..7db09cdee9ef49 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2230,7 +2230,7 @@ bool SemaOpenMP::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, dyn_cast(Last->getAssociatedExpression()); if ((UO && UO->getOpcode() == UO_Deref) || isa(Last->getAssociatedExpression()) || - isa(Last->getAssociatedExpression()) || + isa(Last->getAssociatedExpression()) || isa(EI->getAssociatedExpression()) || isa(Last->getAssociatedExpression())) { IsVariableAssociatedWithSection = true; @@ -3884,7 +3884,7 @@ class DSAAttrChecker final : public StmtVisitor { MappableComponent &MC) { return MC.getAssociatedDeclaration() == nullptr && - (isa( + (isa( MC.getAssociatedExpression()) || isa( MC.getAssociatedExpression()) || @@ -4062,7 +4062,7 @@ class DSAAttrChecker final : public StmtVisitor { // Do both expressions have the same kind? if (CCI->getAssociatedExpression()->getStmtClass() != SC.getAssociatedExpression()->getStmtClass()) - if (!((isa( + if (!((isa( SC.getAssociatedExpression()) || isa( SC.getAssociatedExpression())) && @@ -5428,9 +5428,9 @@ static std::pair getPrivateItem(Sema &S, Expr *&RefExpr, Base = TempASE->getBase()->IgnoreParenImpCasts(); RefExpr = Base; IsArrayExpr = ArraySubscript; - } else if (auto *OASE = dyn_cast_or_null(RefExpr)) { + } else if (auto *OASE = dyn_cast_or_null(RefExpr)) { Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast(Base)) + while (auto *TempOASE = dyn_cast(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); while (auto *TempASE = dyn_cast(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); @@ -6060,10 +6060,10 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, // Array section - need to check for the mapping of the array section // element. QualType CanonType = E->getType().getCanonicalType(); - if (CanonType->isSpecificBuiltinType(BuiltinType::OMPArraySection)) { - const auto *OASE = cast(E->IgnoreParenImpCasts()); + if (CanonType->isSpecificBuiltinType(BuiltinType::ArraySection)) { + const auto *OASE = cast(E->IgnoreParenImpCasts()); QualType BaseType = - OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + ArraySectionExpr::getBaseOriginalType(OASE->getBase()); QualType ElemType; if (const auto *ATy = BaseType->getAsArrayTypeUnsafe()) ElemType = ATy->getElementType(); @@ -19534,7 +19534,7 @@ struct ReductionData { } // namespace static bool checkOMPArraySectionConstantForReduction( - ASTContext &Context, const OMPArraySectionExpr *OASE, bool &SingleElement, + ASTContext &Context, const ArraySectionExpr *OASE, bool &SingleElement, SmallVectorImpl &ArraySizes) { const Expr *Length = OASE->getLength(); if (Length == nullptr) { @@ -19561,7 +19561,7 @@ static bool checkOMPArraySectionConstantForReduction( // We require length = 1 for all array sections except the right-most to // guarantee that the memory region is contiguous and has no holes in it. - while (const auto *TempOASE = dyn_cast(Base)) { + while (const auto *TempOASE = dyn_cast(Base)) { Length = TempOASE->getLength(); if (Length == nullptr) { // For array sections of the form [1:] or [:], we would need to analyze @@ -19766,12 +19766,12 @@ static bool actOnOMPReductionKindClause( Expr *TaskgroupDescriptor = nullptr; QualType Type; auto *ASE = dyn_cast(RefExpr->IgnoreParens()); - auto *OASE = dyn_cast(RefExpr->IgnoreParens()); + auto *OASE = dyn_cast(RefExpr->IgnoreParens()); if (ASE) { Type = ASE->getType().getNonReferenceType(); } else if (OASE) { QualType BaseType = - OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + ArraySectionExpr::getBaseOriginalType(OASE->getBase()); if (const auto *ATy = BaseType->getAsArrayTypeUnsafe()) Type = ATy->getElementType(); else @@ -21305,10 +21305,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPDependClause( // List items used in depend clauses cannot be zero-length array // sections. QualType ExprTy = RefExpr->getType().getNonReferenceType(); - const auto *OASE = dyn_cast(SimpleExpr); + const auto *OASE = dyn_cast(SimpleExpr); if (OASE) { QualType BaseType = - OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + ArraySectionExpr::getBaseOriginalType(OASE->getBase()); if (BaseType.isNull()) return nullptr; if (const auto *ATy = BaseType->getAsArrayTypeUnsafe()) @@ -21367,7 +21367,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPDependClause( Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RefExpr->IgnoreParenImpCasts()); } - if (!Res.isUsable() && !isa(SimpleExpr) && + if (!Res.isUsable() && !isa(SimpleExpr) && !isa(SimpleExpr)) { Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item) << (getLangOpts().OpenMP >= 50 ? 1 : 0) @@ -21468,7 +21468,7 @@ static bool checkTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef, static bool checkArrayExpressionDoesNotReferToWholeSize(Sema &SemaRef, const Expr *E, QualType BaseQTy) { - const auto *OASE = dyn_cast(E); + const auto *OASE = dyn_cast(E); // If this is an array subscript, it refers to the whole size if the size of // the dimension is constant and equals 1. Also, an array section assumes the @@ -21526,7 +21526,7 @@ static bool checkArrayExpressionDoesNotReferToWholeSize(Sema &SemaRef, static bool checkArrayExpressionDoesNotReferToUnitySize(Sema &SemaRef, const Expr *E, QualType BaseQTy) { - const auto *OASE = dyn_cast(E); + const auto *OASE = dyn_cast(E); // An array subscript always refer to a single element. Also, an array section // assumes the format of an array subscript if no colon is used. @@ -21741,14 +21741,14 @@ class MapBaseChecker final : public StmtVisitor { return RelevantExpr || Visit(E); } - bool VisitOMPArraySectionExpr(OMPArraySectionExpr *OASE) { + bool VisitArraySectionExpr(ArraySectionExpr *OASE) { // After OMP 5.0 Array section in reduction clause will be implicitly // mapped assert(!(SemaRef.getLangOpts().OpenMP < 50 && NoDiagnose) && "Array sections cannot be implicitly mapped."); Expr *E = OASE->getBase()->IgnoreParenImpCasts(); QualType CurType = - OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); + ArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1] // If the type of a list item is a reference to a type T then the type @@ -21921,7 +21921,7 @@ static const Expr *checkMapClauseExpressionBase( auto CE = CurComponents.rend(); for (; CI != CE; ++CI) { const auto *OASE = - dyn_cast(CI->getAssociatedExpression()); + dyn_cast(CI->getAssociatedExpression()); if (!OASE) continue; if (OASE && OASE->getLength()) @@ -21991,10 +21991,10 @@ static bool checkMapConflicts( // variable in map clauses of the same construct. if (CurrentRegionOnly && (isa(CI->getAssociatedExpression()) || - isa(CI->getAssociatedExpression()) || + isa(CI->getAssociatedExpression()) || isa(CI->getAssociatedExpression())) && (isa(SI->getAssociatedExpression()) || - isa(SI->getAssociatedExpression()) || + isa(SI->getAssociatedExpression()) || isa(SI->getAssociatedExpression()))) { SemaRef.Diag(CI->getAssociatedExpression()->getExprLoc(), diag::err_omp_multiple_array_items_in_map_clause) @@ -22022,11 +22022,10 @@ static bool checkMapConflicts( if (const auto *ASE = dyn_cast(SI->getAssociatedExpression())) { Type = ASE->getBase()->IgnoreParenImpCasts()->getType(); - } else if (const auto *OASE = dyn_cast( + } else if (const auto *OASE = dyn_cast( SI->getAssociatedExpression())) { const Expr *E = OASE->getBase()->IgnoreParenImpCasts(); - Type = - OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); + Type = ArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); } else if (const auto *OASE = dyn_cast( SI->getAssociatedExpression())) { Type = OASE->getBase()->getType()->getPointeeType(); @@ -22501,13 +22500,13 @@ static void checkMappableExpressionList( (void)I; QualType Type; auto *ASE = dyn_cast(VE->IgnoreParens()); - auto *OASE = dyn_cast(VE->IgnoreParens()); + auto *OASE = dyn_cast(VE->IgnoreParens()); auto *OAShE = dyn_cast(VE->IgnoreParens()); if (ASE) { Type = ASE->getType().getNonReferenceType(); } else if (OASE) { QualType BaseType = - OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + ArraySectionExpr::getBaseOriginalType(OASE->getBase()); if (const auto *ATy = BaseType->getAsArrayTypeUnsafe()) Type = ATy->getElementType(); else @@ -23976,7 +23975,7 @@ SemaOpenMP::ActOnOpenMPUseDeviceAddrClause(ArrayRef VarList, MVLI.VarBaseDeclarations.push_back(D); MVLI.VarComponents.emplace_back(); Expr *Component = SimpleRefExpr; - if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || + if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || isa(RefExpr->IgnoreParenImpCasts()))) Component = SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); @@ -24126,7 +24125,7 @@ SemaOpenMP::ActOnOpenMPHasDeviceAddrClause(ArrayRef VarList, // against other clauses later on. Expr *Component = SimpleRefExpr; auto *VD = dyn_cast(D); - if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || + if (VD && (isa(RefExpr->IgnoreParenImpCasts()) || isa(RefExpr->IgnoreParenImpCasts()))) Component = SemaRef.DefaultFunctionArrayLvalueConversion(SimpleRefExpr).get(); @@ -24540,7 +24539,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPAffinityClause( Sema::TentativeAnalysisScope Trap(SemaRef); Res = SemaRef.CreateBuiltinUnaryOp(ELoc, UO_AddrOf, SimpleExpr); } - if (!Res.isUsable() && !isa(SimpleExpr) && + if (!Res.isUsable() && !isa(SimpleExpr) && !isa(SimpleExpr)) { Diag(ELoc, diag::err_omp_expected_addressable_lvalue_or_array_item) << 1 << 0 << RefExpr->getSourceRange(); @@ -24653,7 +24652,7 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr( Expr *Stride, SourceLocation RBLoc) { ASTContext &Context = getASTContext(); if (Base->hasPlaceholderType() && - !Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { + !Base->hasPlaceholderType(BuiltinType::ArraySection)) { ExprResult Result = SemaRef.CheckPlaceholderExpr(Base); if (Result.isInvalid()) return ExprError(); @@ -24693,13 +24692,13 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr( (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) || (Length && (Length->isTypeDependent() || Length->isValueDependent())) || (Stride && (Stride->isTypeDependent() || Stride->isValueDependent()))) { - return new (Context) OMPArraySectionExpr( + return new (Context) ArraySectionExpr( Base, LowerBound, Length, Stride, Context.DependentTy, VK_LValue, OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); } // Perform default conversions. - QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base); + QualType OriginalTy = ArraySectionExpr::getBaseOriginalType(Base); QualType ResultTy; if (OriginalTy->isAnyPointerType()) { ResultTy = OriginalTy->getPointeeType(); @@ -24822,14 +24821,14 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr( } } - if (!Base->hasPlaceholderType(BuiltinType::OMPArraySection)) { + if (!Base->hasPlaceholderType(BuiltinType::ArraySection)) { ExprResult Result = SemaRef.DefaultFunctionArrayLvalueConversion(Base); if (Result.isInvalid()) return ExprError(); Base = Result.get(); } - return new (Context) OMPArraySectionExpr( - Base, LowerBound, Length, Stride, Context.OMPArraySectionTy, VK_LValue, + return new (Context) ArraySectionExpr( + Base, LowerBound, Length, Stride, Context.ArraySectionTy, VK_LValue, OK_Ordinary, ColonLocFirst, ColonLocSecond, RBLoc); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1d30ba31e17940..f47bc219e6fa32 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -2784,15 +2784,23 @@ class TreeTransform { /// /// By default, performs semantic analysis to build the new expression. /// Subclasses may override this routine to provide different behavior. - ExprResult RebuildOMPArraySectionExpr(Expr *Base, SourceLocation LBracketLoc, - Expr *LowerBound, - SourceLocation ColonLocFirst, - SourceLocation ColonLocSecond, - Expr *Length, Expr *Stride, - SourceLocation RBracketLoc) { - return getSema().OpenMP().ActOnOMPArraySectionExpr( - Base, LBracketLoc, LowerBound, ColonLocFirst, ColonLocSecond, Length, - Stride, RBracketLoc); + ExprResult RebuildArraySectionExpr(bool IsOMPArraySection, Expr *Base, + SourceLocation LBracketLoc, + Expr *LowerBound, + SourceLocation ColonLocFirst, + SourceLocation ColonLocSecond, + Expr *Length, Expr *Stride, + SourceLocation RBracketLoc) { + if (IsOMPArraySection) + return getSema().OpenMP().ActOnOMPArraySectionExpr( + Base, LBracketLoc, LowerBound, ColonLocFirst, ColonLocSecond, Length, + Stride, RBracketLoc); + + assert(Stride == nullptr && !ColonLocSecond.isValid() && + "Stride/second colon not allowed for OpenACC"); + + return getSema().OpenACC().ActOnArraySectionExpr( + Base, LBracketLoc, LowerBound, ColonLocFirst, Length, RBracketLoc); } /// Build a new array shaping expression. @@ -11742,7 +11750,7 @@ TreeTransform::TransformMatrixSubscriptExpr(MatrixSubscriptExpr *E) { template ExprResult -TreeTransform::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) { +TreeTransform::TransformArraySectionExpr(ArraySectionExpr *E) { ExprResult Base = getDerived().TransformExpr(E->getBase()); if (Base.isInvalid()) return ExprError(); @@ -11762,20 +11770,25 @@ TreeTransform::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) { } ExprResult Stride; - if (Expr *Str = E->getStride()) { - Stride = getDerived().TransformExpr(Str); - if (Stride.isInvalid()) - return ExprError(); + if (E->isOMPArraySection()) { + if (Expr *Str = E->getStride()) { + Stride = getDerived().TransformExpr(Str); + if (Stride.isInvalid()) + return ExprError(); + } } if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() && - LowerBound.get() == E->getLowerBound() && Length.get() == E->getLength()) + LowerBound.get() == E->getLowerBound() && + Length.get() == E->getLength() && + (E->isOpenACCArraySection() || Stride.get() == E->getStride())) return E; - return getDerived().RebuildOMPArraySectionExpr( - Base.get(), E->getBase()->getEndLoc(), LowerBound.get(), - E->getColonLocFirst(), E->getColonLocSecond(), Length.get(), Stride.get(), - E->getRBracketLoc()); + return getDerived().RebuildArraySectionExpr( + E->isOMPArraySection(), Base.get(), E->getBase()->getEndLoc(), + LowerBound.get(), E->getColonLocFirst(), + E->isOMPArraySection() ? E->getColonLocSecond() : SourceLocation{}, + Length.get(), Stride.get(), E->getRBracketLoc()); } template diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index f8d54c0c398906..e017f5bdb48858 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -261,8 +261,8 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { case BuiltinType::IncompleteMatrixIdx: ID = PREDEF_TYPE_INCOMPLETE_MATRIX_IDX; break; - case BuiltinType::OMPArraySection: - ID = PREDEF_TYPE_OMP_ARRAY_SECTION; + case BuiltinType::ArraySection: + ID = PREDEF_TYPE_ARRAY_SECTION; break; case BuiltinType::OMPArrayShaping: ID = PREDEF_TYPE_OMP_ARRAY_SHAPING; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index c99d6ed1c36c88..0ef57a3ea804ef 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7385,11 +7385,11 @@ QualType ASTReader::GetType(TypeID ID) { case PREDEF_TYPE_INCOMPLETE_MATRIX_IDX: T = Context.IncompleteMatrixIdxTy; break; - case PREDEF_TYPE_OMP_ARRAY_SECTION: - T = Context.OMPArraySectionTy; + case PREDEF_TYPE_ARRAY_SECTION: + T = Context.ArraySectionTy; break; case PREDEF_TYPE_OMP_ARRAY_SHAPING: - T = Context.OMPArraySectionTy; + T = Context.OMPArrayShapingTy; break; case PREDEF_TYPE_OMP_ITERATOR: T = Context.OMPIteratorTy; diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index baded0fe19831f..7d3930022a69c0 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -956,14 +956,22 @@ void ASTStmtReader::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) { E->setRBracketLoc(readSourceLocation()); } -void ASTStmtReader::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) { +void ASTStmtReader::VisitArraySectionExpr(ArraySectionExpr *E) { VisitExpr(E); + E->ASType = Record.readEnum(); + E->setBase(Record.readSubExpr()); E->setLowerBound(Record.readSubExpr()); E->setLength(Record.readSubExpr()); - E->setStride(Record.readSubExpr()); + + if (E->isOMPArraySection()) + E->setStride(Record.readSubExpr()); + E->setColonLocFirst(readSourceLocation()); - E->setColonLocSecond(readSourceLocation()); + + if (E->isOMPArraySection()) + E->setColonLocSecond(readSourceLocation()); + E->setRBracketLoc(readSourceLocation()); } @@ -3090,8 +3098,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = new (Context) MatrixSubscriptExpr(Empty); break; - case EXPR_OMP_ARRAY_SECTION: - S = new (Context) OMPArraySectionExpr(Empty); + case EXPR_ARRAY_SECTION: + S = new (Context) ArraySectionExpr(Empty); break; case EXPR_OMP_ARRAY_SHAPING: diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index cd5f733baf76f4..39aec31b6d8790 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -880,16 +880,21 @@ void ASTStmtWriter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) { Code = serialization::EXPR_ARRAY_SUBSCRIPT; } -void ASTStmtWriter::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) { +void ASTStmtWriter::VisitArraySectionExpr(ArraySectionExpr *E) { VisitExpr(E); + Record.writeEnum(E->ASType); Record.AddStmt(E->getBase()); Record.AddStmt(E->getLowerBound()); Record.AddStmt(E->getLength()); - Record.AddStmt(E->getStride()); + if (E->isOMPArraySection()) + Record.AddStmt(E->getStride()); Record.AddSourceLocation(E->getColonLocFirst()); - Record.AddSourceLocation(E->getColonLocSecond()); + + if (E->isOMPArraySection()) + Record.AddSourceLocation(E->getColonLocSecond()); + Record.AddSourceLocation(E->getRBracketLoc()); - Code = serialization::EXPR_OMP_ARRAY_SECTION; + Code = serialization::EXPR_ARRAY_SECTION; } void ASTStmtWriter::VisitOMPArrayShapingExpr(OMPArrayShapingExpr *E) { diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp index a678c3827e7f12..1cebfbbee77dae 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp @@ -188,9 +188,9 @@ void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State, os << DerefStr1; break; } - case Stmt::OMPArraySectionExprClass: { + case Stmt::ArraySectionExprClass: { os << "Array access"; - const OMPArraySectionExpr *AE = cast(S); + const ArraySectionExpr *AE = cast(S); AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(), State.get(), N->getLocationContext()); os << DerefStr1; diff --git a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp index 1cf81b54e77d32..7ac34ef8164e4c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp @@ -350,7 +350,7 @@ static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1, return false; case Stmt::CallExprClass: case Stmt::ArraySubscriptExprClass: - case Stmt::OMPArraySectionExprClass: + case Stmt::ArraySectionExprClass: case Stmt::OMPArrayShapingExprClass: case Stmt::OMPIteratorExprClass: case Stmt::ImplicitCastExprClass: diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 09c69f9612d96b..0b1edf3e5c96bf 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1948,7 +1948,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::CXXPseudoDestructorExprClass: case Stmt::SubstNonTypeTemplateParmExprClass: case Stmt::CXXNullPtrLiteralExprClass: - case Stmt::OMPArraySectionExprClass: + case Stmt::ArraySectionExprClass: case Stmt::OMPArrayShapingExprClass: case Stmt::OMPIteratorExprClass: case Stmt::SYCLUniqueStableNameExprClass: diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 32850f5eea92a9..0c047b6c5da2f8 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -439,6 +439,9 @@ class DependencyScanningAction : public tooling::ToolAction { if (Result) setLastCC1Arguments(std::move(OriginalInvocation)); + // Propagate the statistics to the parent FileManager. + DriverFileMgr->AddStats(ScanInstance.getFileManager()); + return Result; } diff --git a/clang/test/CodeGen/X86/ms-x86-intrinsics.c b/clang/test/CodeGen/X86/ms-x86-intrinsics.c index a1c90d71c8ebf5..aa557c8e19a83d 100644 --- a/clang/test/CodeGen/X86/ms-x86-intrinsics.c +++ b/clang/test/CodeGen/X86/ms-x86-intrinsics.c @@ -48,7 +48,7 @@ long long test__readfsqword(unsigned long Offset) { __int64 test__emul(int a, int b) { return __emul(a, b); } -// CHECK-LABEL: define dso_local i64 @test__emul(i32 noundef %a, i32 noundef %b) +// CHECK-LABEL: define dso_local range(i64 -4611686016279904256, 4611686018427387905) i64 @test__emul(i32 noundef %a, i32 noundef %b) // CHECK: [[X:%[0-9]+]] = sext i32 %a to i64 // CHECK: [[Y:%[0-9]+]] = sext i32 %b to i64 // CHECK: [[RES:%[0-9]+]] = mul nsw i64 [[Y]], [[X]] @@ -57,7 +57,7 @@ __int64 test__emul(int a, int b) { unsigned __int64 test__emulu(unsigned int a, unsigned int b) { return __emulu(a, b); } -// CHECK-LABEL: define dso_local i64 @test__emulu(i32 noundef %a, i32 noundef %b) +// CHECK-LABEL: define dso_local range(i64 0, -8589934590) i64 @test__emulu(i32 noundef %a, i32 noundef %b) // CHECK: [[X:%[0-9]+]] = zext i32 %a to i64 // CHECK: [[Y:%[0-9]+]] = zext i32 %b to i64 // CHECK: [[RES:%[0-9]+]] = mul nuw i64 [[Y]], [[X]] @@ -108,13 +108,13 @@ long long test__readgsqword(unsigned long Offset) { __int64 test__mulh(__int64 a, __int64 b) { return __mulh(a, b); } -// CHECK-X64-LABEL: define dso_local i64 @test__mulh(i64 noundef %a, i64 noundef %b) +// CHECK-X64-LABEL: define dso_local range(i64 -4611686018427387904, 4611686018427387905) i64 @test__mulh(i64 noundef %a, i64 noundef %b) // CHECK-X64: = mul nsw i128 % unsigned __int64 test__umulh(unsigned __int64 a, unsigned __int64 b) { return __umulh(a, b); } -// CHECK-X64-LABEL: define dso_local i64 @test__umulh(i64 noundef %a, i64 noundef %b) +// CHECK-X64-LABEL: define dso_local range(i64 0, -1) i64 @test__umulh(i64 noundef %a, i64 noundef %b) // CHECK-X64: = mul nuw i128 % __int64 test_mul128(__int64 Multiplier, diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 1fb39f9a346667..de30a00138ac80 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -66,7 +66,7 @@ struct anon_struct { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10:[0-9]+]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 @@ -114,7 +114,7 @@ void test1(struct annotated *p, int index, int val) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 @@ -158,7 +158,7 @@ void test2(struct annotated *p, size_t index) { p->array[index] = __builtin_dynamic_object_size(p->array, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test2_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -169,7 +169,7 @@ void test2(struct annotated *p, size_t index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 0 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP3]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test2_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -203,7 +203,7 @@ size_t test2_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], [[INDEX]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12 @@ -257,7 +257,7 @@ void test3(struct annotated *p, size_t index) { p->array[index] = __builtin_dynamic_object_size(p, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -270,7 +270,7 @@ void test3(struct annotated *p, size_t index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 0 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP5]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test3_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 8589934601) i64 @test3_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -308,7 +308,7 @@ size_t test3_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont4: // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], 2 @@ -325,7 +325,7 @@ size_t test3_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = icmp ult i64 [[IDXPROM13]], [[TMP6]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP7]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds16: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM13]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM13]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont20: // SANITIZE-WITH-ATTR-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD7]], 3 @@ -342,7 +342,7 @@ size_t test3_bdos(struct annotated *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP13:%.*]] = icmp ult i64 [[IDXPROM30]], [[TMP12]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP13]], label [[CONT37:%.*]], label [[HANDLER_OUT_OF_BOUNDS33:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds33: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM30]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM30]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont37: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM30]] @@ -441,7 +441,7 @@ void test4(struct annotated *p, int index, int fam_idx) { p->array[index + 2] = (unsigned char)__builtin_dynamic_object_size(&(p->array[fam_idx]), 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test4_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -456,7 +456,7 @@ void test4(struct annotated *p, int index, int fam_idx) { // SANITIZE-WITH-ATTR-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 [[TMP3]], i64 0 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP7]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test4_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -494,7 +494,7 @@ size_t test4_bdos(struct annotated *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 @@ -545,7 +545,7 @@ void test5(struct anon_struct *p, int index) { p->array[index] = __builtin_dynamic_object_size(p, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -556,7 +556,7 @@ void test5(struct anon_struct *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = select i1 [[DOTINV]], i64 0, i64 [[TMP1]] // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP2]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test5_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 16, 1) i64 @test5_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -590,7 +590,7 @@ size_t test5_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 @@ -683,7 +683,7 @@ size_t test6_bdos(struct anon_struct *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont7: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9 @@ -756,7 +756,7 @@ size_t test7_bdos(struct union_of_fams *p) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT9:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont9: // SANITIZE-WITH-ATTR-NEXT: [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9 @@ -797,7 +797,7 @@ void test8(struct union_of_fams *p, int index) { p->ints[index] = __builtin_dynamic_object_size(p->ints, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test8_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -805,7 +805,7 @@ void test8(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext i8 [[DOT_COUNTED_BY_LOAD]] to i64 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP0]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test8_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -955,7 +955,7 @@ void test10(struct union_of_fams *p, int index) { p->bytes[index] = (unsigned char)__builtin_dynamic_object_size(p->bytes, 1); } -// SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test10_bdos( +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos( // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // SANITIZE-WITH-ATTR-NEXT: entry: // SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -964,7 +964,7 @@ void test10(struct union_of_fams *p, int index) { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NARROW]] to i64 // SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP0]] // -// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i64 @test10_bdos( +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -2147483648, 2147483648) i64 @test10_bdos( // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] { // NO-SANITIZE-WITH-ATTR-NEXT: entry: // NO-SANITIZE-WITH-ATTR-NEXT: [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 @@ -1095,10 +1095,10 @@ int test12_a, test12_b; // SANITIZE-WITH-ATTR-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0 // SANITIZE-WITH-ATTR-NEXT: br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds4: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB19:[0-9]+]], i64 0) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.type_mismatch6: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT_ANON_5:%.*]], ptr @test12_foo, i64 1, i32 0, i32 0, i32 0) to i64)) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT_ANON_5:%.*]], ptr @test12_foo, i64 1, i32 0, i32 0, i32 0) to i64)) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12( @@ -1188,7 +1188,7 @@ struct test13_bar { // SANITIZE-WITH-ATTR-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], [[INDEX]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB23:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont5: // SANITIZE-WITH-ATTR-NEXT: [[REVMAP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16 @@ -1249,7 +1249,7 @@ struct test14_foo { // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: trap: // SANITIZE-WITH-ATTR-NEXT: tail call void @llvm.trap() #[[ATTR10]] @@ -1305,7 +1305,7 @@ int test14(int idx) { // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: // SANITIZE-WITH-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: trap: // SANITIZE-WITH-ATTR-NEXT: tail call void @llvm.trap() #[[ATTR10]] @@ -1326,7 +1326,7 @@ int test14(int idx) { // SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: // SANITIZE-WITHOUT-ATTR-NEXT: [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64 -// SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB10:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR: trap: // SANITIZE-WITHOUT-ATTR-NEXT: tail call void @llvm.trap() #[[ATTR8]] @@ -1487,7 +1487,7 @@ struct tests_foo { // SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB26:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont4: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84 @@ -1528,7 +1528,7 @@ int test24(int c, struct tests_foo *var) { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10 // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont5: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44 @@ -1580,7 +1580,7 @@ struct test26_foo { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont5: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 8 @@ -1651,7 +1651,7 @@ struct test27_foo { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ENTRIES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 24 @@ -1717,7 +1717,7 @@ struct test28_foo { // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB31:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont17: // SANITIZE-WITH-ATTR-NEXT: [[ARR:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12 @@ -1779,7 +1779,7 @@ struct annotated_struct_array { // SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64 // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB33:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont3: // SANITIZE-WITH-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]] @@ -1791,7 +1791,7 @@ struct annotated_struct_array { // SANITIZE-WITH-ATTR-NEXT: [[TMP4:%.*]] = icmp ult i64 [[IDXPROM15]], [[TMP3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: br i1 [[TMP4]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR: handler.out_of_bounds16: -// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR10]], !nosanitize [[META2]] +// SANITIZE-WITH-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR10]], !nosanitize [[META2]] // SANITIZE-WITH-ATTR-NEXT: unreachable, !nosanitize [[META2]] // SANITIZE-WITH-ATTR: cont20: // SANITIZE-WITH-ATTR-NEXT: [[ARRAY:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 12 @@ -1826,7 +1826,7 @@ struct annotated_struct_array { // SANITIZE-WITHOUT-ATTR-NEXT: [[TMP1:%.*]] = zext i32 [[IDX1]] to i64 // SANITIZE-WITHOUT-ATTR-NEXT: br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR: handler.out_of_bounds: -// SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] +// SANITIZE-WITHOUT-ATTR-NEXT: tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR-NEXT: unreachable, !nosanitize [[META9]] // SANITIZE-WITHOUT-ATTR: cont21: // SANITIZE-WITHOUT-ATTR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]] diff --git a/clang/test/CodeGen/ms-mixed-ptr-sizes.c b/clang/test/CodeGen/ms-mixed-ptr-sizes.c index 89d05fd30b72c2..51bea60eb39dce 100644 --- a/clang/test/CodeGen/ms-mixed-ptr-sizes.c +++ b/clang/test/CodeGen/ms-mixed-ptr-sizes.c @@ -49,7 +49,7 @@ void test_other(struct Foo *f, __attribute__((address_space(10))) int *i) { } int test_compare1(int *__ptr32 __uptr i, int *__ptr64 j) { - // ALL-LABEL: define dso_local noundef i32 @test_compare1 + // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare1 // X64: %{{.+}} = addrspacecast ptr %j to ptr addrspace(271) // X64: %cmp = icmp eq ptr addrspace(271) %{{.+}}, %i // X86: %{{.+}} = addrspacecast ptr addrspace(272) %j to ptr addrspace(271) @@ -58,7 +58,7 @@ int test_compare1(int *__ptr32 __uptr i, int *__ptr64 j) { } int test_compare2(int *__ptr32 __sptr i, int *__ptr64 j) { - // ALL-LABEL: define dso_local noundef i32 @test_compare2 + // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare2 // X64: %{{.+}} = addrspacecast ptr %j to ptr addrspace(270) // X64: %cmp = icmp eq ptr addrspace(270) %{{.+}}, %i // X86: %{{.+}} = addrspacecast ptr addrspace(272) %j to ptr @@ -67,7 +67,7 @@ int test_compare2(int *__ptr32 __sptr i, int *__ptr64 j) { } int test_compare3(int *__ptr32 __uptr i, int *__ptr64 j) { - // ALL-LABEL: define dso_local noundef i32 @test_compare3 + // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare3 // X64: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr // X64: %cmp = icmp eq ptr %{{.+}}, %j // X86: %{{.+}} = addrspacecast ptr addrspace(271) %i to ptr addrspace(272) @@ -76,7 +76,7 @@ int test_compare3(int *__ptr32 __uptr i, int *__ptr64 j) { } int test_compare4(int *__ptr32 __sptr i, int *__ptr64 j) { - // ALL-LABEL: define dso_local noundef i32 @test_compare4 + // ALL-LABEL: define dso_local range(i32 0, 2) i32 @test_compare4 // X64: %{{.+}} = addrspacecast ptr addrspace(270) %i to ptr // X64: %cmp = icmp eq ptr %{{.+}}, %j // X86: %{{.+}} = addrspacecast ptr %i to ptr addrspace(272) diff --git a/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm b/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm new file mode 100644 index 00000000000000..3bbdbebc5cb576 --- /dev/null +++ b/clang/test/CodeGenObjCXX/msabi-stret-arm64.mm @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple aarch64-pc-windows-msvc -fobjc-runtime=gnustep-2.2 -fobjc-dispatch-method=non-legacy -emit-llvm -o - %s | FileCheck %s + +// Pass and return for type size <= 8 bytes. +struct S1 { + int a[2]; +}; + +// Pass and return hfa <= 8 bytes +struct F1 { + float a[2]; +}; + +// Pass and return for type size > 16 bytes. +struct S2 { + int a[5]; +}; + +// Pass and return aggregate (of size < 16 bytes) with non-trivial destructor. +// Sret and inreg: Returned in x0 +struct S3 { + int a[3]; + ~S3(); +}; +S3::~S3() { +} + + +@interface MsgTest { id isa; } @end +@implementation MsgTest +- (S1) smallS1 { + S1 x; + x.a[0] = 0; + x.a[1] = 1; + return x; + +} +- (F1) smallF1 { + F1 x; + x.a[0] = 0.2f; + x.a[1] = 0.5f; + return x; +} +- (S2) stretS2 { + S2 x; + for (int i = 0; i < 5; i++) { + x.a[i] = i; + } + return x; +} +- (S3) stretInRegS3 { + S3 x; + for (int i = 0; i < 3; i++) { + x.a[i] = i; + } + return x; +} ++ (S3) msgTestStretInRegS3 { + S3 x; + for (int i = 0; i < 3; i++) { + x.a[i] = i; + } + return x; +} +@end + +void test0(MsgTest *t) { + // CHECK: call {{.*}} @objc_msgSend + S1 ret = [t smallS1]; + // CHECK: call {{.*}} @objc_msgSend + F1 ret2 = [t smallF1]; + // CHECK: call {{.*}} @objc_msgSend_stret + S2 ret3 = [t stretS2]; + // CHECK: call {{.*}} @objc_msgSend_stret2 + S3 ret4 = [t stretInRegS3]; + // CHECK: call {{.*}} @objc_msgSend_stret2 + S3 ret5 = [MsgTest msgTestStretInRegS3]; +} diff --git a/clang/test/Driver/default-denormal-fp-math.c b/clang/test/Driver/default-denormal-fp-math.c index 5f87e151df49e4..c04ad5c08b8d0d 100644 --- a/clang/test/Driver/default-denormal-fp-math.c +++ b/clang/test/Driver/default-denormal-fp-math.c @@ -3,15 +3,6 @@ // RUN: %clang -### -target x86_64-unknown-linux-gnu --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s -// crtfastmath enables ftz and daz -// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s - -// crt not linked in with nostartfiles -// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math -nostartfiles --sysroot=%S/Inputs/basic_linux_tree -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s - -// If there's no crtfastmath, don't assume ftz/daz -// RUN: %clang -### -target x86_64-unknown-linux-gnu -ffast-math --sysroot=/dev/null -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-IEEE %s - // RUN: %clang -### -target x86_64-scei-ps4 -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-PRESERVESIGN %s // Flag omitted for default diff --git a/clang/test/Driver/integrated-as.c b/clang/test/Driver/integrated-as.c index e78fde873cf47f..b0a26f6011b0c7 100644 --- a/clang/test/Driver/integrated-as.c +++ b/clang/test/Driver/integrated-as.c @@ -3,7 +3,7 @@ // RUN: %clang -### -c -save-temps -integrated-as --target=x86_64 %s 2>&1 | FileCheck %s // CHECK: cc1as -// CHECK: -mrelax-all +// CHECK-NOT: -mrelax-all // RISC-V does not enable -mrelax-all // RUN: %clang -### -c -save-temps -integrated-as --target=riscv64 %s 2>&1 | FileCheck %s -check-prefix=RISCV-RELAX diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index d918f4f2d7dbd9..958e682b6c3c11 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -1446,6 +1446,32 @@ // RUN: %clang --target=i386-unknown-linux -no-pie -### %s -ffast-math \ // RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// Don't link crtfastmath.o with -shared +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -shared \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -Ofast -shared \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// Check for effects of -mdaz-ftz +// RUN: %clang --target=x86_64-unknown-linux -### %s -ffast-math -shared -mdaz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -mdaz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -mdaz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -### %s -ffast-math -shared -mno-daz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -mno-daz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -mno-daz-ftz \ +// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s // CHECK-CRTFASTMATH: usr/lib/gcc/x86_64-unknown-linux/10.2.0{{/|\\\\}}crtfastmath.o // CHECK-NOCRTFASTMATH-NOT: crtfastmath.o diff --git a/clang/test/Frontend/ast-dump-on-llvm.ll b/clang/test/Frontend/ast-dump-on-llvm.ll new file mode 100644 index 00000000000000..cdacfde4ba848c --- /dev/null +++ b/clang/test/Frontend/ast-dump-on-llvm.ll @@ -0,0 +1,29 @@ +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump=json %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-EQ-JSON +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump=default %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-EQ-DEFAULT +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all=json %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL-EQ-JSON +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-all=default %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-ALL-EQ-DEFAULT + +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-print %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-PRINT +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-view %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-VIEW +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-list %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-LIST +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-lookups %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-LOOKUP +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-filter=FunctionDecl %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-FILTER-EQ +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -ast-dump-decl-types %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-AST-DUMP-DECL-TYPES +; RUN: not %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-SYNTAX-ONLY + + +; CHECK-AST-DUMP: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-EQ-JSON: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-EQ-DEFAULT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-ALL: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-ALL-EQ-JSON: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-ALL-EQ-DEFAULT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-PRINT: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-VIEW: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-LIST: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-LOOKUP: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-FILTER-EQ: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-AST-DUMP-DECL-TYPES: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' +; CHECK-SYNTAX-ONLY: fatal error: cannot apply AST actions to LLVM IR file '{{.*}}' diff --git a/clang/test/OpenMP/task_depend_messages.cpp b/clang/test/OpenMP/task_depend_messages.cpp index 388595bef4de1b..3f39c55527b5d4 100644 --- a/clang/test/OpenMP/task_depend_messages.cpp +++ b/clang/test/OpenMP/task_depend_messages.cpp @@ -62,7 +62,7 @@ int main(int argc, char **argv, char *env[]) { #pragma omp task depend(in : argv[ : argc][1 : argc - 1]) #pragma omp task depend(in : arr[0]) #pragma omp task depend(depobj:argc) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not 'int'}} - #pragma omp task depend(depobj : argv[ : argc][1 : argc - 1]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not ''}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not ''}} + #pragma omp task depend(depobj : argv[ : argc][1 : argc - 1]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} omp50-error {{expected lvalue expression of 'omp_depend_t' type, not ''}} omp51-error {{expected lvalue expression of 'omp_depend_t' type, not ''}} #pragma omp task depend(depobj : arr[0]) // omp45-error {{expected 'in', 'out', 'inout' or 'mutexinoutset' in OpenMP clause 'depend'}} #pragma omp task depend(in : ([ // expected-error {{expected variable name or 'this' in lambda capture list}} expected-error {{expected ')'}} expected-note {{to match this '('}} #pragma omp task depend(in : ([] // expected-error {{expected body of lambda expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} diff --git a/clang/test/ParserOpenACC/parse-cache-construct.cpp b/clang/test/ParserOpenACC/parse-cache-construct.cpp index f0a35824696d8c..1ab2153a68be8d 100644 --- a/clang/test/ParserOpenACC/parse-cache-construct.cpp +++ b/clang/test/ParserOpenACC/parse-cache-construct.cpp @@ -74,12 +74,12 @@ void use() { for (int i = 0; i < 10; ++i) { // FIXME: Once we have a new array-section type to represent OpenACC as // well, change this error message. - // expected-error@+2{{OpenMP array section is not allowed here}} + // expected-error@+2{{OpenACC sub-array is not allowed here}} // expected-warning@+1{{OpenACC construct 'cache' not yet implemented, pragma ignored}} #pragma acc cache(Arrs.MemArr[3:4].array[1:4]) } for (int i = 0; i < 10; ++i) { - // expected-error@+2{{OpenMP array section is not allowed here}} + // expected-error@+2{{OpenACC sub-array is not allowed here}} // expected-warning@+1{{OpenACC construct 'cache' not yet implemented, pragma ignored}} #pragma acc cache(Arrs.MemArr[3:4].array[4]) } diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index 799f22b8c120e5..ee2cb2d1501dea 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -482,13 +482,13 @@ void VarListClauses() { #pragma acc serial copy(HasMem.MemArr[3].array[1:4]), seq for(;;){} - // expected-error@+3{{OpenMP array section is not allowed here}} + // expected-error@+3{{OpenACC sub-array is not allowed here}} // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[1:3].array[1]), seq for(;;){} - // expected-error@+3{{OpenMP array section is not allowed here}} + // expected-error@+3{{OpenACC sub-array is not allowed here}} // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[1:3].array[1:2]), seq diff --git a/clang/test/Preprocessor/predefined-macros-hlsl.hlsl b/clang/test/Preprocessor/predefined-macros-hlsl.hlsl index 251362cd03c0f8..cc5233fbcb2aca 100644 --- a/clang/test/Preprocessor/predefined-macros-hlsl.hlsl +++ b/clang/test/Preprocessor/predefined-macros-hlsl.hlsl @@ -1,14 +1,19 @@ -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-amplification | FileCheck -match-full-lines %s --check-prefixes=CHECK,AMPLIFICATION -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-compute | FileCheck -match-full-lines %s --check-prefixes=CHECK,COMPUTE -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-domain | FileCheck -match-full-lines %s --check-prefixes=CHECK,DOMAIN -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-geometry | FileCheck -match-full-lines %s --check-prefixes=CHECK,GEOMETRY -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-hull | FileCheck -match-full-lines %s --check-prefixes=CHECK,HULL -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-library | FileCheck -match-full-lines %s --check-prefixes=CHECK,LIBRARY -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-mesh | FileCheck -match-full-lines %s --check-prefixes=CHECK,MESH -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-pixel | FileCheck -match-full-lines %s --check-prefixes=CHECK,PIXEL -// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-vertex | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-amplification | FileCheck -match-full-lines %s --check-prefixes=CHECK,AMPLIFICATION,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-compute | FileCheck -match-full-lines %s --check-prefixes=CHECK,COMPUTE,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-domain | FileCheck -match-full-lines %s --check-prefixes=CHECK,DOMAIN,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-geometry | FileCheck -match-full-lines %s --check-prefixes=CHECK,GEOMETRY,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-hull | FileCheck -match-full-lines %s --check-prefixes=CHECK,HULL,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-library | FileCheck -match-full-lines %s --check-prefixes=CHECK,LIBRARY,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-mesh | FileCheck -match-full-lines %s --check-prefixes=CHECK,MESH,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-pixel | FileCheck -match-full-lines %s --check-prefixes=CHECK,PIXEL,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.0-vertex | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX,NOHALF +// RUN: %clang_cc1 %s -E -dM -o - -triple dxil-pc-shadermodel6.3-vertex -fnative-half-type | FileCheck -match-full-lines %s --check-prefixes=CHECK,VERTEX,HALF + +// HALF: #define __HLSL_ENABLE_16_BIT 1 +// NOHALF-NOT: __HLSL_ENABLE_16_BIT // CHECK: #define __HLSL_VERSION 2021 + // CHECK: #define __SHADER_STAGE_AMPLIFICATION 14 // CHECK: #define __SHADER_STAGE_COMPUTE 5 // CHECK: #define __SHADER_STAGE_DOMAIN 4 diff --git a/clang/test/TestRunner.sh b/clang/test/TestRunner.sh deleted file mode 100755 index f96d3d552d2ee6..00000000000000 --- a/clang/test/TestRunner.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh -# -# TestRunner.sh - Backward compatible utility for testing an individual file. - -# Find where this script is. -Dir=$(dirname $(which $0)) -AbsDir=$(cd $Dir; pwd) - -# Find 'lit', assuming standard layout. -lit=$AbsDir/../../../utils/lit/lit.py - -# Dispatch to lit. -$lit "$@" diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 191e944ae91e03..ae5b697b8eb9e0 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -594,9 +594,7 @@ getInterfaceFile(const StringRef Filename) { std::unique_ptr IF; switch (identify_magic(Buffer->getBuffer())) { case file_magic::macho_dynamically_linked_shared_lib: - LLVM_FALLTHROUGH; case file_magic::macho_dynamically_linked_shared_lib_stub: - LLVM_FALLTHROUGH; case file_magic::macho_universal_binary: return DylibReader::get(Buffer->getMemBufferRef()); break; diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 74163f30e19b1d..398a11a5703558 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -5713,8 +5713,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("UnaryOperator"); case CXCursor_ArraySubscriptExpr: return cxstring::createRef("ArraySubscriptExpr"); - case CXCursor_OMPArraySectionExpr: - return cxstring::createRef("OMPArraySectionExpr"); + case CXCursor_ArraySectionExpr: + return cxstring::createRef("ArraySectionExpr"); case CXCursor_OMPArrayShapingExpr: return cxstring::createRef("OMPArrayShapingExpr"); case CXCursor_OMPIteratorExpr: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 454bf754986189..9325a16d2a8486 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -423,8 +423,8 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, K = CXCursor_UnexposedExpr; break; - case Stmt::OMPArraySectionExprClass: - K = CXCursor_OMPArraySectionExpr; + case Stmt::ArraySectionExprClass: + K = CXCursor_ArraySectionExpr; break; case Stmt::OMPArrayShapingExprClass: diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp index 3e00e82d6df966..49f76bac704d68 100644 --- a/clang/unittests/Format/SortIncludesTest.cpp +++ b/clang/unittests/Format/SortIncludesTest.cpp @@ -6,19 +6,19 @@ // //===----------------------------------------------------------------------===// -#include "FormatTestUtils.h" +#include "FormatTestBase.h" #include "clang/Format/Format.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "gtest/gtest.h" -#define DEBUG_TYPE "format-test" +#define DEBUG_TYPE "sort-includes-test" namespace clang { namespace format { namespace { -class SortIncludesTest : public ::testing::Test { +class SortIncludesTest : public test::FormatTestBase { protected: std::vector GetCodeRange(StringRef Code) { return std::vector(1, tooling::Range(0, Code.size())); @@ -819,6 +819,122 @@ TEST_F(SortIncludesTest, CalculatesCorrectCursorPositionWithRegrouping) { EXPECT_EQ(27u, newCursor(Code, 28)); // Start of last line } +TEST_F(SortIncludesTest, + CalculatesCorrectCursorPositionWhenNoReplacementsWithRegroupingAndCRLF) { + Style.IncludeBlocks = Style.IBS_Regroup; + FmtStyle.LineEnding = FormatStyle::LE_CRLF; + Style.IncludeCategories = { + {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}}; + std::string Code = "#include \"a\"\r\n" // Start of line: 0 + "\r\n" // Start of line: 14 + "#include \"b\"\r\n" // Start of line: 16 + "\r\n" // Start of line: 30 + "#include \"c\"\r\n" // Start of line: 32 + "\r\n" // Start of line: 46 + "int i;"; // Start of line: 48 + verifyNoChange(Code); + EXPECT_EQ(0u, newCursor(Code, 0)); + EXPECT_EQ(14u, newCursor(Code, 14)); + EXPECT_EQ(16u, newCursor(Code, 16)); + EXPECT_EQ(30u, newCursor(Code, 30)); + EXPECT_EQ(32u, newCursor(Code, 32)); + EXPECT_EQ(46u, newCursor(Code, 46)); + EXPECT_EQ(48u, newCursor(Code, 48)); +} + +TEST_F( + SortIncludesTest, + CalculatesCorrectCursorPositionWhenRemoveLinesReplacementsWithRegroupingAndCRLF) { + Style.IncludeBlocks = Style.IBS_Regroup; + FmtStyle.LineEnding = FormatStyle::LE_CRLF; + Style.IncludeCategories = {{".*", 0, 0, false}}; + std::string Code = "#include \"a\"\r\n" // Start of line: 0 + "\r\n" // Start of line: 14 + "#include \"b\"\r\n" // Start of line: 16 + "\r\n" // Start of line: 30 + "#include \"c\"\r\n" // Start of line: 32 + "\r\n" // Start of line: 46 + "int i;"; // Start of line: 48 + std::string Expected = "#include \"a\"\r\n" // Start of line: 0 + "#include \"b\"\r\n" // Start of line: 14 + "#include \"c\"\r\n" // Start of line: 28 + "\r\n" // Start of line: 42 + "int i;"; // Start of line: 44 + EXPECT_EQ(Expected, sort(Code)); + EXPECT_EQ(0u, newCursor(Code, 0)); + EXPECT_EQ( + 14u, + newCursor(Code, 14)); // cursor on empty line in include block is ignored + EXPECT_EQ(14u, newCursor(Code, 16)); + EXPECT_EQ( + 30u, + newCursor(Code, 30)); // cursor on empty line in include block is ignored + EXPECT_EQ(28u, newCursor(Code, 32)); + EXPECT_EQ(42u, newCursor(Code, 46)); + EXPECT_EQ(44u, newCursor(Code, 48)); +} + +// FIXME: the tests below should pass. +#if 0 +TEST_F( + SortIncludesTest, + CalculatesCorrectCursorPositionWhenNewLineReplacementsWithRegroupingAndCRLF) { + Style.IncludeBlocks = Style.IBS_Regroup; + FmtStyle.LineEnding = FormatStyle::LE_CRLF; + Style.IncludeCategories = { + {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}}; + std::string Code = "#include \"a\"\r\n" // Start of line: 0 + "#include \"b\"\r\n" // Start of line: 14 + "#include \"c\"\r\n" // Start of line: 28 + "\r\n" // Start of line: 42 + "int i;"; // Start of line: 44 + std::string Expected = "#include \"a\"\r\n" // Start of line: 0 + "\r\n" // Start of line: 14 + "#include \"b\"\r\n" // Start of line: 16 + "\r\n" // Start of line: 30 + "#include \"c\"\r\n" // Start of line: 32 + "\r\n" // Start of line: 46 + "int i;"; // Start of line: 48 + EXPECT_EQ(Expected, sort(Code)); + EXPECT_EQ(0u, newCursor(Code, 0)); + EXPECT_EQ(15u, newCursor(Code, 16)); + EXPECT_EQ(30u, newCursor(Code, 32)); + EXPECT_EQ(44u, newCursor(Code, 46)); + EXPECT_EQ(46u, newCursor(Code, 48)); +} + +TEST_F( + SortIncludesTest, + CalculatesCorrectCursorPositionWhenNoNewLineReplacementsWithRegroupingAndCRLF) { + Style.IncludeBlocks = Style.IBS_Regroup; + FmtStyle.LineEnding = FormatStyle::LE_CRLF; + Style.IncludeCategories = { + {"^\"a\"", 0, 0, false}, {"^\"b\"", 1, 1, false}, {".*", 2, 2, false}}; + std::string Code = "#include \"a\"\r\n" // Start of line: 0 + "\r\n" // Start of line: 14 + "#include \"c\"\r\n" // Start of line: 16 + "\r\n" // Start of line: 30 + "#include \"b\"\r\n" // Start of line: 32 + "\r\n" // Start of line: 46 + "int i;"; // Start of line: 48 + std::string Expected = "#include \"a\"\r\n" // Start of line: 0 + "\r\n" // Start of line: 14 + "#include \"b\"\r\n" // Start of line: 16 + "\r\n" // Start of line: 30 + "#include \"c\"\r\n" // Start of line: 32 + "\r\n" // Start of line: 46 + "int i;"; // Start of line: 48 + EXPECT_EQ(Expected, sort(Code)); + EXPECT_EQ(0u, newCursor(Code, 0)); + EXPECT_EQ(14u, newCursor(Code, 14)); + EXPECT_EQ(30u, newCursor(Code, 32)); + EXPECT_EQ(30u, newCursor(Code, 30)); + EXPECT_EQ(15u, newCursor(Code, 15)); + EXPECT_EQ(44u, newCursor(Code, 46)); + EXPECT_EQ(46u, newCursor(Code, 48)); +} +#endif + TEST_F(SortIncludesTest, DeduplicateIncludes) { EXPECT_EQ("#include \n" "#include \n" diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp index b2a1069a9a61cc..31d91ef3c73928 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp @@ -28,7 +28,7 @@ void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer, const char *format, const DataInfo *DI, const char *strip_path_prefix) { RenderContext(buffer); - buffer->AppendF(kFormatData, DI->start); + buffer->AppendF(kFormatData, reinterpret_cast(DI->start)); } bool MarkupStackTracePrinter::RenderNeedsSymbolization(const char *format) { @@ -43,12 +43,13 @@ void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer, const char *strip_path_prefix) { CHECK(!RenderNeedsSymbolization(format)); RenderContext(buffer); - buffer->AppendF(kFormatFrame, frame_no, address); + buffer->AppendF(kFormatFrame, frame_no, reinterpret_cast(address)); } bool MarkupSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *stack) { char buffer[kFormatFunctionMax]; - internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr); + internal_snprintf(buffer, sizeof(buffer), kFormatFunction, + reinterpret_cast(addr)); stack->info.function = internal_strdup(buffer); return true; } @@ -118,7 +119,8 @@ static void RenderMmaps(InternalScopedString *buffer, // module.base_address == dlpi_addr // range.beg == dlpi_addr + p_vaddr // relative address == p_vaddr == range.beg - module.base_address - buffer->AppendF(kFormatMmap, range.beg, range.end - range.beg, moduleId, + buffer->AppendF(kFormatMmap, reinterpret_cast(range.beg), + range.end - range.beg, static_cast(moduleId), accessBuffer.data(), range.beg - module.base_address()); buffer->Append("\n"); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h index 83643504e1289e..a43661eaecf2ff 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h @@ -33,13 +33,13 @@ constexpr uptr kFormatFunctionMax = 64; // More than big enough for 64-bit hex. constexpr const char *kFormatData = "{{{data:%p}}}"; // One frame in a backtrace (printed on a line by itself). -constexpr const char *kFormatFrame = "{{{bt:%u:%p}}}"; +constexpr const char *kFormatFrame = "{{{bt:%d:%p}}}"; // Module contextual element. -constexpr const char *kFormatModule = "{{{module:%d:%s:elf:%s}}}"; +constexpr const char *kFormatModule = "{{{module:%zu:%s:elf:%s}}}"; // mmap for a module segment. -constexpr const char *kFormatMmap = "{{{mmap:%p:0x%x:load:%d:%s:0x%x}}}"; +constexpr const char *kFormatMmap = "{{{mmap:%p:0x%zx:load:%d:%s:0x%zx}}}"; // Dump trigger element. #define FORMAT_DUMPFILE "{{{dumpfile:%s:%s}}}" diff --git a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp index 5f3c8b81c07b31..fc793abf44cda5 100644 --- a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp @@ -108,9 +108,9 @@ bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name, // Create the VMO. zx_status_t Status = _zx_vmo_create(Size, 0, &Vmo); if (UNLIKELY(Status != ZX_OK)) { - if (!IsNoMemError(Status) || !AllowNoMem) - dieOnError(Status, "zx_vmo_create", Size); - return false; + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmo_create", Size); } if (Name != nullptr) @@ -123,15 +123,15 @@ bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name, Status = _zx_vmar_map(_zx_vmar_root_self(), MapFlags, 0, Vmo, 0, Size, &MapAddr); if (UNLIKELY(Status != ZX_OK)) { - if (!IsNoMemError(Status) || !AllowNoMem) - dieOnError(Status, "zx_vmar_map", Size); - - Status = _zx_handle_close(Vmo); - CHECK_EQ(Status, ZX_OK); + if (AllowNoMem && IsNoMemError(Status)) { + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); - MapAddr = 0; - Vmo = ZX_HANDLE_INVALID; - return false; + MapAddr = 0; + Vmo = ZX_HANDLE_INVALID; + return false; + } + dieOnError(Status, "zx_vmar_map", Size); } if (PreCommit) { @@ -194,9 +194,9 @@ bool MemMapFuchsia::remapImpl(uptr Addr, uptr Size, const char *Name, _zx_vmar_map(_zx_vmar_root_self(), MapFlags, Addr - getRootVmarBase(), Vmo, Addr - MapAddr, Size, &MappedAddr); if (UNLIKELY(Status != ZX_OK)) { - if (!IsNoMemError(Status) || !AllowNoMem) - dieOnError(Status, "zx_vmar_map", Size); - return false; + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmar_map", Size); } DCHECK_EQ(Addr, MappedAddr); @@ -234,9 +234,9 @@ bool ReservedMemoryFuchsia::createImpl(UNUSED uptr Addr, uptr Size, zx_status_t Status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_ALLOW_FAULTS, 0, getPlaceholderVmo(), 0, Size, &Base); if (UNLIKELY(Status != ZX_OK)) { - if (!IsNoMemError(Status) || !AllowNoMem) - dieOnError(Status, "zx_vmar_map", Size); - return false; + if (AllowNoMem && IsNoMemError(Status)) + return false; + dieOnError(Status, "zx_vmar_map", Size); } Capacity = Size; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 61d57976ae43b6..d6119051b1622f 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -884,9 +884,10 @@ template class SizeClassAllocator64 { ScopedLock ML(Region->MMLock); const bool RegionIsExhausted = Region->Exhausted; - if (!RegionIsExhausted) + if (!RegionIsExhausted) { PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, MaxBlockCount); + } ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; { @@ -1019,7 +1020,6 @@ template class SizeClassAllocator64 { MAP_ALLOWNOMEM))) { Printf("Can't reserve pages for size class %zu.\n", getSizeByClassId(ClassId)); - Region->Exhausted = true; return 0U; } initRegion(Region, ClassId, diff --git a/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in b/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in index 32a8c48e9c1c79..3fa9a7a2780e24 100644 --- a/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in +++ b/compiler-rt/test/ctx_profile/Unit/lit.site.cfg.py.in @@ -23,5 +23,6 @@ config.test_source_root = config.test_exec_root # host triple as the trailing path component. The value is incorrect for i386 # tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as # target, and we don't support different environments for building and, -# respectivelly, running tests, we shouldn't see this case. -assert not config.enable_per_target_runtime_dir or config.target_arch == config.host_arch +# respectively, running tests, we we only need to fix up the x86_64 case. +if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch: + config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', config.compiler_rt_libdir) diff --git a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in index c968e403a44d09..1e2442a1487a43 100644 --- a/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in +++ b/compiler-rt/test/memprof/Unit/lit.site.cfg.py.in @@ -21,10 +21,11 @@ config.test_source_root = config.test_exec_root # When LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=on, the initial value of # config.compiler_rt_libdir (COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR) has the # host triple as the trailing path component. The value is incorrect for i386 -# tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as +# tests on x86_64 hosts and vice versa. But, since only x86_64 is enabled as # target, and we don't support different environments for building and, -# respectivelly, running tests, we shouldn't see this case. -assert not config.enable_per_target_runtime_dir or config.target_arch == config.host_arch +# respectively, running tests, we we only need to fix up the x86_64 case. +if config.enable_per_target_runtime_dir and config.target_arch != config.host_arch: + config.compiler_rt_libdir = re.sub(r'/i386(?=-[^/]+$)', '/x86_64', config.compiler_rt_libdir) if not config.parallelism_group: - config.parallelism_group = 'shadow-memory' \ No newline at end of file + config.parallelism_group = 'shadow-memory' diff --git a/flang/include/flang/Common/visit.h b/flang/include/flang/Common/visit.h index 4d0897301e01db..d867338be7e0f5 100644 --- a/flang/include/flang/Common/visit.h +++ b/flang/include/flang/Common/visit.h @@ -40,11 +40,17 @@ inline RT_API_ATTRS RESULT Log2VisitHelper( return visitor(std::get<(LOW + N)>(std::forward(u))...); \ } VISIT_CASE_N(1) + [[fallthrough]]; VISIT_CASE_N(2) + [[fallthrough]]; VISIT_CASE_N(3) + [[fallthrough]]; VISIT_CASE_N(4) + [[fallthrough]]; VISIT_CASE_N(5) + [[fallthrough]]; VISIT_CASE_N(6) + [[fallthrough]]; VISIT_CASE_N(7) #undef VISIT_CASE_N } @@ -82,7 +88,7 @@ inline RT_API_ATTRS auto visit(VISITOR &&visitor, VARIANT &&...u) // Some versions of clang have bugs that cause compilation to hang // on these templates. MSVC and older GCC versions may work but are // not well tested. So enable only for GCC 9 and better. -#if __GNUC__ < 9 +#if __GNUC__ < 9 && !defined(__clang__) #define FLANG_USE_STD_VISIT #endif diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 87a714d17015cb..531616e7926aca 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -861,7 +861,6 @@ getOutputStream(CompilerInstance &ci, llvm::StringRef inFile, return ci.createDefaultOutputFile( /*Binary=*/false, inFile, /*extension=*/"ll"); case BackendActionTy::Backend_EmitFIR: - LLVM_FALLTHROUGH; case BackendActionTy::Backend_EmitHLFIR: return ci.createDefaultOutputFile( /*Binary=*/false, inFile, /*extension=*/"mlir"); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 8b62fe8c022f80..f66607dfa22f1b 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3794,7 +3794,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto needCleanup = fir::getIntIfConstant(cleanup); if (needCleanup && *needCleanup) temps.push_back(temp); - addSymbol(sym, temp, /*forced=*/true); + addSymbol(sym, + hlfir::translateToExtendedValue(loc, builder, temp).first, + /*forced=*/true); builder.create(loc, addr, temp, transferKindAttr); ++nbDeviceResidentObject; @@ -3810,12 +3812,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); + bool isInDeviceContext = + builder.getRegion().getParentOfType(); bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) || Fortran::evaluate::HasCUDAAttrs(assign.rhs); bool hasCUDAImplicitTransfer = Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs); llvm::SmallVector implicitTemps; - if (hasCUDAImplicitTransfer) + if (hasCUDAImplicitTransfer && !isInDeviceContext) implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign); // Gather some information about the assignment that will impact how it is @@ -3874,13 +3878,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::StatementContext localStmtCtx; hlfir::Entity rhs = evaluateRhs(localStmtCtx); hlfir::Entity lhs = evaluateLhs(localStmtCtx); - if (isCUDATransfer && !hasCUDAImplicitTransfer) + if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext) genCUDADataTransfer(builder, loc, assign, lhs, rhs); else builder.create(loc, rhs, lhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); - if (hasCUDAImplicitTransfer) { + if (hasCUDAImplicitTransfer && !isInDeviceContext) { localSymbols.popScope(); for (mlir::Value temp : implicitTemps) builder.create(loc, temp); diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 4ebd736315bcbc..70483685d20019 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -98,7 +98,7 @@ end ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) -! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %21#0 {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.heap>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) ! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.heap> ! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> ! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref> @@ -119,3 +119,25 @@ end ! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + + +! Check that fir.cuda_data_transfer are not generated within cuf kernel +subroutine sub4() + integer, parameter :: n = 10 + real, device :: adev(n) + real :: ahost(n) + real :: b + integer :: i + + adev = ahost + !$cuf kernel do <<<*,*>>> + do i = 1, n + adev(i) = adev(i) + b + enddo +end subroutine + +! CHECK-LABEL: func.func @_QPsub4() +! CHECK: fir.cuda_data_transfer +! CHECK: fir.cuda_kernel<<<*, *>>> +! CHECK-NOT: fir.cuda_data_transfer +! CHECK: hlfir.assign diff --git a/flang/test/Lower/OpenMP/FIR/if-clause.f90 b/flang/test/Lower/OpenMP/FIR/if-clause.f90 index f686b9708fc54a..683d9f7ef97267 100644 --- a/flang/test/Lower/OpenMP/FIR/if-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/if-clause.f90 @@ -1,7 +1,9 @@ ! This test checks lowering of OpenMP IF clauses. -! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s -! RUN: %flang_fc1 -fopenmp -emit-fir %s -o - | FileCheck %s +! The "if" clause was added to the "simd" directive in OpenMP 5.0, and +! to the "teams" directive in OpenMP 5.2. +! RUN: bbc -fopenmp -fopenmp-version=52 -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-fir %s -o - | FileCheck %s program main integer :: i diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90 index db7d30295c45d9..91e8750578bfb4 100644 --- a/flang/test/Lower/OpenMP/FIR/simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/simd.f90 @@ -1,6 +1,7 @@ ! Tests for 2.9.3.1 Simd -! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s +! The "if" clause was added to the "simd" directive in OpenMP 5.0. +! RUN: bbc -fopenmp -fopenmp-version=50 -emit-fir -hlfir=false %s -o - | FileCheck %s !CHECK-LABEL: func @_QPsimd() subroutine simd diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index a7344e02cf7cca..ea4b9240e9e514 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -1,4 +1,5 @@ -!RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp %s -o - | FileCheck %s +! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1. +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s !=============================================================================== ! Target_Enter Simple diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index ce4427a0c2cab2..7c15c275d8cc9d 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -1,7 +1,9 @@ ! This test checks lowering of OpenMP IF clauses. -! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s -! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s +! The "if" clause was added to the "simd" directive in OpenMP 5.0, and +! to the "teams" directive in OpenMP 5.2. +! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck %s program main integer :: i diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 190aa615212176..8ec1a3cefb4a60 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -1,7 +1,8 @@ ! Tests for 2.9.3.1 Simd -!RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: bbc -hlfir -emit-hlfir -fopenmp %s -o - | FileCheck %s +! The "if" clause was added to the "simd" directive in OpenMP 5.0. +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! RUN: bbc -hlfir -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s !CHECK-LABEL: func @_QPsimd() subroutine simd diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 0f0c736d316250..44f77b5c33607b 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -1,4 +1,5 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s +! The "thread_limit" clause was added to the "target" construct in OpenMP 5.1. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s !=============================================================================== ! Target_Enter Simple diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 index d849dd206b9439..90eede4f84108f 100644 --- a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 +++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 @@ -1,5 +1,6 @@ -!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s -!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s +! The "use_device_addr" was added to the "target data" directive in OpenMP 5.0. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s ! This tests primary goal is to check the promotion of ! non-CPTR arguments from use_device_ptr to diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index 8724f321a9d117..938ab76c6ecbdd 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -50,6 +50,7 @@ Implemented Papers - P1659R3 - ``std::ranges::starts_with`` and ``std::ranges::ends_with`` - P3029R1 - Better ``mdspan``'s CTAD - P2387R3 - Pipe support for user-defined range adaptors +- P2713R1 - Escaping improvements in ``std::format`` Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index f75dd288304b27..01387a404f5d67 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -108,7 +108,7 @@ "`P2164R9 `__","LWG", "``views::enumerate``","February 2023","","","|ranges|" "`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","|In Progress| [#note-P2711R1]_","","|ranges|" "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" -"`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" +"`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","|Complete|","19.0","|format|" "`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|" "`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","|Complete|","17.0","|format|" "`P2693R1 `__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|" diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 008f7418ab9c05..666be319757c17 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -32,7 +32,7 @@ "`3951 `__","[expected.object.swap]: Using ``value()`` instead of ``has_value()``","Kona November 2023","","","" "`3953 `__","``iter_move`` for ``common_iterator`` and ``counted_iterator`` should return ``decltype(auto)``","Kona November 2023","","","|ranges|" "`3957 `__","[container.alloc.reqmts] The value category of v should be claimed","Kona November 2023","","","" -"`3965 `__","Incorrect example in [format.string.escaped] p3 for formatting of combining characters","Kona November 2023","","","|format|" +"`3965 `__","Incorrect example in [format.string.escaped] p3 for formatting of combining characters","Kona November 2023","|Complete|","19.0","|format|" "`3970 `__","[mdspan.syn] Missing definition of ``full_extent_t`` and ``full_extent``","Kona November 2023","","","" "`3973 `__","Monadic operations should be ADL-proof","Kona November 2023","","","" "`3974 `__","``mdspan::operator[]`` should not copy ``OtherIndexTypes``","Kona November 2023","","","" @@ -49,7 +49,7 @@ "`4012 `__","``common_view::begin/end`` are missing the ``simple-view`` check","Tokyo March 2024","","","|ranges|" "`4013 `__","``lazy_split_view::outer-iterator::value_type`` should not provide default constructor","Tokyo March 2024","","","|ranges|" "`4016 `__","container-insertable checks do not match what container-inserter does","Tokyo March 2024","","","" -"`4023 `__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","","","" +"`4023 `__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","|Complete|","19.0","" "`4025 `__","Move assignment operator of ``std::expected`` should not be conditionally deleted","Tokyo March 2024","","","" "`4030 `__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","|Nothing To Do|","","" "`4031 `__","``bad_expected_access`` member functions should be ``noexcept``","Tokyo March 2024","|Complete|","16.0","" diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv index 7da77def92daa2..3780c1ed5c1279 100644 --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -10,7 +10,7 @@ Number,Name,Standard,Assignee,Status,First released version "`P2508R1 `__","Exposing ``std::basic-format-string``","C++23","Mark de Wever","|Complete|",15.0 "`P2585R0 `__","Improving default container formatting","C++23","Mark de Wever","|Complete|",17.0 "`P2539R4 `__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever","|Complete|","18.0" -"`P2713R1 `__","Escaping improvements in ``std::format``","C++23","Mark de Wever","" +"`P2713R1 `__","Escaping improvements in ``std::format``","C++23","Mark de Wever","|Complete|",19.0 "`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|",17.0 "`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|Complete|",17.0 "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In Progress|" diff --git a/libcxx/include/__availability b/libcxx/include/__availability index aa761eb5bfe5e3..7a02ae00846bfa 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -28,30 +28,32 @@ // that previously released library. Normally, this would be a load-time error // when one tries to launch the program against the older library. // -// For example, the filesystem library was introduced in the dylib in macOS 10.15. -// If a user compiles on a macOS 10.15 host but targets macOS 10.13 with their -// program, the compiler would normally not complain (because the required -// declarations are in the headers), but the dynamic loader would fail to find -// the symbols when actually trying to launch the program on macOS 10.13. To -// turn this into a compile-time issue instead, declarations are annotated with -// when they were introduced, and the compiler can produce a diagnostic if the -// program references something that isn't available on the deployment target. +// For example, the filesystem library was introduced in the dylib in LLVM 9. +// On Apple platforms, this corresponds to macOS 10.15. If a user compiles on +// a macOS 10.15 host but targets macOS 10.13 with their program, the compiler +// would normally not complain (because the required declarations are in the +// headers), but the dynamic loader would fail to find the symbols when actually +// trying to launch the program on macOS 10.13. To turn this into a compile-time +// issue instead, declarations are annotated with when they were introduced, and +// the compiler can produce a diagnostic if the program references something that +// isn't available on the deployment target. // // This mechanism is general in nature, and any vendor can add their markup to // the library (see below). Whenever a new feature is added that requires support // in the shared library, two macros are added below to allow marking the feature // as unavailable: -// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_NO_` which must be defined -// exactly when compiling for a target that doesn't support the feature. -// 2. A macro named `_LIBCPP_AVAILABILITY_`, which must always be defined -// and must expand to the proper availability attribute for the platform. +// 1. A macro named `_LIBCPP_AVAILABILITY_HAS_` which must be defined +// to `_LIBCPP_INTRODUCED_IN_` for the appropriate LLVM version. +// 2. A macro named `_LIBCPP_AVAILABILITY_`, which must be defined to +// `_LIBCPP_INTRODUCED_IN__MARKUP` for the appropriate LLVM version. // // When vendors decide to ship the feature as part of their shared library, they -// can update these macros appropriately for their platform, and the library will -// use those to provide an optimal user experience. +// can update the `_LIBCPP_INTRODUCED_IN_` macro (and the markup counterpart) +// based on the platform version they shipped that version of LLVM in. The library +// will then use this markup to provide an optimal user experience on these platforms. // // Furthermore, many features in the standard library have corresponding -// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_NO_` macros +// feature-test macros. The `_LIBCPP_AVAILABILITY_HAS_` macros // are checked by the corresponding feature-test macros generated by // generate_feature_test_macro_components.py to ensure that the library // doesn't announce a feature as being implemented if it is unavailable on @@ -74,237 +76,181 @@ // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. -// doesn't support the proper attributes. #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif #endif +// When availability annotations are disabled, we take for granted that features introduced +// in all versions of the library are available. #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -// These macros control the availability of std::bad_optional_access and -// other exception types. These were put in the shared library to prevent -// code bloat from every user program defining the vtable for these exception -// types. -// -// Note that when exceptions are disabled, the methods that normally throw -// these exceptions can be used even on older deployment targets, but those -// methods will abort instead of throwing. -# define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS 1 -# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS - -# define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS 1 -# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCPP_INTRODUCED_IN_LLVM_4 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP /* nothing */ -# define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST 1 -# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST +# define _LIBCPP_INTRODUCED_IN_LLVM_9 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP /* nothing */ +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH /* nothing */ +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP /* nothing */ -// These macros control the availability of all parts of that -// depend on something in the dylib. -# define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 1 -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP +# define _LIBCPP_INTRODUCED_IN_LLVM_10 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP /* nothing */ -// This controls the availability of the C++20 synchronization library, -// which requires shared library support for various operations -// (see libcxx/src/atomic.cpp). This includes , , -// , and notification functions on std::atomic. -# define _LIBCPP_AVAILABILITY_HAS_SYNC 1 -# define _LIBCPP_AVAILABILITY_SYNC +# define _LIBCPP_INTRODUCED_IN_LLVM_12 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP /* nothing */ -// Enable additional explicit instantiations of iostreams components. This -// reduces the number of weak definitions generated in programs that use -// iostreams by providing a single strong definition in the shared library. -// -// TODO: Enable additional explicit instantiations on GCC once it supports exclude_from_explicit_instantiation, -// or once libc++ doesn't use the attribute anymore. -// TODO: Enable them on Windows once https://llvm.org/PR41018 has been fixed. -# if !defined(_LIBCPP_COMPILER_GCC) && !defined(_WIN32) -# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1 -# else -# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0 -# endif +# define _LIBCPP_INTRODUCED_IN_LLVM_14 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP /* nothing */ -// This controls the availability of floating-point std::to_chars functions. -// These overloads were added later than the integer overloads. -# define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 1 -# define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT +# define _LIBCPP_INTRODUCED_IN_LLVM_15 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP /* nothing */ -// This controls whether the library claims to provide a default verbose -// termination function, and consequently whether the headers will try -// to use it when the mechanism isn't overriden at compile-time. -# define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT 1 -# define _LIBCPP_AVAILABILITY_VERBOSE_ABORT +# define _LIBCPP_INTRODUCED_IN_LLVM_16 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP /* nothing */ -// This controls the availability of the C++17 std::pmr library, -// which is implemented in large part in the built library. -# define _LIBCPP_AVAILABILITY_HAS_PMR 1 -# define _LIBCPP_AVAILABILITY_PMR +# define _LIBCPP_INTRODUCED_IN_LLVM_18 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP /* nothing */ -// These macros controls the availability of __cxa_init_primary_exception -// in the built library, which std::make_exception_ptr might use -// (see libcxx/include/__exception/exception_ptr.h). -# define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION 1 -# define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION - -// This controls the availability of C++23 , which -// has a dependency on the built library (it needs access to -// the underlying buffer types of std::cout, std::cerr, and std::clog. -# define _LIBCPP_AVAILABILITY_HAS_PRINT 1 -# define _LIBCPP_AVAILABILITY_PRINT - -// This controls the availability of the C++20 time zone database. -// The parser code is built in the library. -# define _LIBCPP_AVAILABILITY_HAS_TZDB 1 -# define _LIBCPP_AVAILABILITY_TZDB - -// These macros determine whether we assume that std::bad_function_call and -// std::bad_expected_access provide a key function in the dylib. This allows -// centralizing their vtable and typeinfo instead of having all TUs provide -// a weak definition that then gets deduplicated. -# define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 1 -# define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION -# define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 1 -# define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION +# define _LIBCPP_INTRODUCED_IN_LLVM_19 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP /* nothing */ #elif defined(__APPLE__) -# define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS \ - (!defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) || __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ >= 50000) - -# define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS - -# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS __attribute__((availability(watchos, strict, introduced = 5.0))) -# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS - -// TODO: Update once this is released -# define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION 0 -# define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION __attribute__((unavailable)) +// LLVM 4 +# if defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 50000 +# define _LIBCPP_INTRODUCED_IN_LLVM_4 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP __attribute__((availability(watchos, strict, introduced = 5.0))) +# else +# define _LIBCPP_INTRODUCED_IN_LLVM_4 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP /* nothing */ +# endif -// +// LLVM 9 // clang-format off # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) // clang-format on -# define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_9 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP \ + __attribute__((availability(macos, strict, introduced = 10.15))) \ + __attribute__((availability(ios, strict, introduced = 13.0))) \ + __attribute__((availability(tvos, strict, introduced = 13.0))) \ + __attribute__((availability(watchos, strict, introduced = 6.0))) +// clang-format off +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH \ + _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") +// clang-format on # else -# define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_9 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP /* nothing */ +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH /* nothing */ +# define _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP /* nothing */ # endif -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY \ - __attribute__((availability(macos, strict, introduced = 10.15))) \ - __attribute__((availability(ios, strict, introduced = 13.0))) \ - __attribute__((availability(tvos, strict, introduced = 13.0))) \ - __attribute__((availability(watchos, strict, introduced = 6.0))) + +// LLVM 10 // clang-format off -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH \ - _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") -# define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) // clang-format on +# define _LIBCPP_INTRODUCED_IN_LLVM_10 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP \ + __attribute__((availability(macos, strict, introduced = 11.0))) \ + __attribute__((availability(ios, strict, introduced = 14.0))) \ + __attribute__((availability(tvos, strict, introduced = 14.0))) \ + __attribute__((availability(watchos, strict, introduced = 7.0))) +# else +# define _LIBCPP_INTRODUCED_IN_LLVM_10 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP /* nothing */ +# endif -// std::to_chars(floating-point) +// LLVM 12 // clang-format off -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130300) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160300) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160300) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90300) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 120000) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80000) // clang-format on -# define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_12 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP \ + __attribute__((availability(macos, strict, introduced = 12.0))) \ + __attribute__((availability(ios, strict, introduced = 15.0))) \ + __attribute__((availability(tvos, strict, introduced = 15.0))) \ + __attribute__((availability(watchos, strict, introduced = 8.0))) # else -# define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_12 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_12_MARKUP /* nothing */ # endif -# define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT \ - __attribute__((availability(macos, strict, introduced = 13.3))) \ - __attribute__((availability(ios, strict, introduced = 16.3))) \ - __attribute__((availability(tvos, strict, introduced = 16.3))) \ - __attribute__((availability(watchos, strict, introduced = 9.3))) -// c++20 synchronization library +// LLVM 14 // clang-format off -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130400) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160500) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160500) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500) // clang-format on -# define _LIBCPP_AVAILABILITY_HAS_SYNC 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_14 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP \ + __attribute__((availability(macos, strict, introduced = 13.4))) \ + __attribute__((availability(ios, strict, introduced = 16.5))) \ + __attribute__((availability(tvos, strict, introduced = 16.5))) \ + __attribute__((availability(watchos, strict, introduced = 9.5))) # else -# define _LIBCPP_AVAILABILITY_HAS_SYNC 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_14 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP /* nothing */ # endif -# define _LIBCPP_AVAILABILITY_SYNC \ - __attribute__((availability(macos, strict, introduced = 11.0))) \ - __attribute__((availability(ios, strict, introduced = 14.0))) \ - __attribute__((availability(tvos, strict, introduced = 14.0))) \ - __attribute__((availability(watchos, strict, introduced = 7.0))) - -// __libcpp_verbose_abort -// TODO: Update once this is released -# define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT 0 -# define _LIBCPP_AVAILABILITY_VERBOSE_ABORT __attribute__((unavailable)) - -// std::pmr +// LLVM 15-16 +# define _LIBCPP_INTRODUCED_IN_LLVM_15 _LIBCPP_INTRODUCED_IN_LLVM_16 +# define _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP // clang-format off # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 140000) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 170000) || \ (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 170000) || \ (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 100000) // clang-format on -# define _LIBCPP_AVAILABILITY_HAS_PMR 0 -# else -# define _LIBCPP_AVAILABILITY_HAS_PMR 1 -# endif -// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed -// Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support -// it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we -// use availability annotations until that bug has been fixed. -# if 0 -# define _LIBCPP_AVAILABILITY_PMR \ +# define _LIBCPP_INTRODUCED_IN_LLVM_16 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP \ __attribute__((availability(macos, strict, introduced = 14.0))) \ __attribute__((availability(ios, strict, introduced = 17.0))) \ __attribute__((availability(tvos, strict, introduced = 17.0))) \ __attribute__((availability(watchos, strict, introduced = 10.0))) # else -# define _LIBCPP_AVAILABILITY_PMR +# define _LIBCPP_INTRODUCED_IN_LLVM_16 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_16_MARKUP /* nothing */ # endif -# define _LIBCPP_AVAILABILITY_HAS_TZDB 0 -# define _LIBCPP_AVAILABILITY_TZDB __attribute__((unavailable)) - -// Warning: This availability macro works differently than the other macros. -// The dylib part of print is not needed on Apple platforms. Therefore when -// the macro is not available the code calling the dylib is commented out. -// The macro _LIBCPP_AVAILABILITY_PRINT is not used. -# define _LIBCPP_AVAILABILITY_HAS_PRINT 0 -# define _LIBCPP_AVAILABILITY_PRINT __attribute__((unavailable)) - -// clang-format off -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 120000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80000) -// clang-format on -# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0 +// LLVM 18 +// TODO: Fill this in +# if 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_18 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP __attribute__((unavailable)) # else -# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_18 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP /* nothing */ # endif -# define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION 0 -# define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION __attribute__((unavailable)) - -# define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION 0 -# define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION __attribute__((unavailable)) +// LLVM 19 +// TODO: Fill this in +# if 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_19 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP __attribute__((unavailable)) +# else +# define _LIBCPP_INTRODUCED_IN_LLVM_19 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP /* nothing */ +# endif #else @@ -315,6 +261,97 @@ #endif +// These macros control the availability of std::bad_optional_access and +// other exception types. These were put in the shared library to prevent +// code bloat from every user program defining the vtable for these exception +// types. +// +// Note that when exceptions are disabled, the methods that normally throw +// these exceptions can be used even on older deployment targets, but those +// methods will abort instead of throwing. +#define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4 +#define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP + +#define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4 +#define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP + +#define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4 +#define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4_MARKUP + +// These macros control the availability of all parts of that +// depend on something in the dylib. +#define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9 +#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP +#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_PUSH +#define _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_INTRODUCED_IN_LLVM_9_MARKUP_POP + +// This controls the availability of the C++20 synchronization library, +// which requires shared library support for various operations +// (see libcxx/src/atomic.cpp). This includes , , +// , and notification functions on std::atomic. +#define _LIBCPP_AVAILABILITY_HAS_SYNC _LIBCPP_INTRODUCED_IN_LLVM_10 +#define _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INTRODUCED_IN_LLVM_10_MARKUP + +// Enable additional explicit instantiations of iostreams components. This +// reduces the number of weak definitions generated in programs that use +// iostreams by providing a single strong definition in the shared library. +// +// TODO: Enable additional explicit instantiations on GCC once it supports exclude_from_explicit_instantiation, +// or once libc++ doesn't use the attribute anymore. +// TODO: Enable them on Windows once https://llvm.org/PR41018 has been fixed. +#if !defined(_LIBCPP_COMPILER_GCC) && !defined(_WIN32) +# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 _LIBCPP_INTRODUCED_IN_LLVM_12 +#else +# define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0 +#endif + +// This controls the availability of floating-point std::to_chars functions. +// These overloads were added later than the integer overloads. +#define _LIBCPP_AVAILABILITY_HAS_TO_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_14 +#define _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_14_MARKUP + +// This controls whether the library claims to provide a default verbose +// termination function, and consequently whether the headers will try +// to use it when the mechanism isn't overriden at compile-time. +#define _LIBCPP_AVAILABILITY_HAS_VERBOSE_ABORT _LIBCPP_INTRODUCED_IN_LLVM_15 +#define _LIBCPP_AVAILABILITY_VERBOSE_ABORT _LIBCPP_INTRODUCED_IN_LLVM_15_MARKUP + +// This controls the availability of the C++17 std::pmr library, +// which is implemented in large part in the built library. +// +// TODO: Enable std::pmr markup once https://github.com/llvm/llvm-project/issues/40340 has been fixed +// Until then, it is possible for folks to try to use `std::pmr` when back-deploying to targets that don't support +// it and it'll be a load-time error, but we don't have a good alternative because the library won't compile if we +// use availability annotations until that bug has been fixed. +#define _LIBCPP_AVAILABILITY_HAS_PMR _LIBCPP_INTRODUCED_IN_LLVM_16 +#define _LIBCPP_AVAILABILITY_PMR + +// These macros controls the availability of __cxa_init_primary_exception +// in the built library, which std::make_exception_ptr might use +// (see libcxx/include/__exception/exception_ptr.h). +#define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION _LIBCPP_INTRODUCED_IN_LLVM_18 +#define _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP + +// This controls the availability of C++23 , which +// has a dependency on the built library (it needs access to +// the underlying buffer types of std::cout, std::cerr, and std::clog. +#define _LIBCPP_AVAILABILITY_HAS_PRINT _LIBCPP_INTRODUCED_IN_LLVM_18 +#define _LIBCPP_AVAILABILITY_PRINT _LIBCPP_INTRODUCED_IN_LLVM_18_MARKUP + +// This controls the availability of the C++20 time zone database. +// The parser code is built in the library. +#define _LIBCPP_AVAILABILITY_HAS_TZDB _LIBCPP_INTRODUCED_IN_LLVM_19 +#define _LIBCPP_AVAILABILITY_TZDB _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP + +// These macros determine whether we assume that std::bad_function_call and +// std::bad_expected_access provide a key function in the dylib. This allows +// centralizing their vtable and typeinfo instead of having all TUs provide +// a weak definition that then gets deduplicated. +# define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19 +# define _LIBCPP_AVAILABILITY_BAD_FUNCTION_CALL_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP +# define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19 +# define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_MARKUP + // Define availability attributes that depend on _LIBCPP_HAS_NO_EXCEPTIONS. // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h index b194f9431c3be3..a4c4c366cf2414 100644 --- a/libcxx/include/__format/escaped_output_table.h +++ b/libcxx/include/__format/escaped_output_table.h @@ -80,10 +80,9 @@ namespace __escaped_output_table { /// The entries of the characters to escape in format's debug string. /// /// Contains the entries for [format.string.escaped]/2.2.1.2.1 -/// CE is a Unicode encoding and C corresponds to either a UCS scalar value -/// whose Unicode property General_Category has a value in the groups -/// Separator (Z) or Other (C) or to a UCS scalar value which has the Unicode -/// property Grapheme_Extend=Yes, as described by table 12 of UAX #44 +/// CE is a Unicode encoding and C corresponds to a UCS scalar value whose +/// Unicode property General_Category has a value in the groups Separator (Z) +/// or Other (C), as described by table 12 of UAX #44 /// /// Separator (Z) consists of General_Category /// - Space_Separator, @@ -98,7 +97,6 @@ namespace __escaped_output_table { /// - Unassigned. /// /// The data is generated from -/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt /// /// The table is similar to the table @@ -110,908 +108,1091 @@ namespace __escaped_output_table { /// - bits [0, 10] The size of the range, allowing 2048 elements. /// - bits [11, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[894] = { - 0x00000020, - 0x0003f821, - 0x00056800, - 0x0018006f, - 0x001bc001, - 0x001c0003, - 0x001c5800, - 0x001c6800, - 0x001d1000, - 0x00241806, - 0x00298000, - 0x002ab801, - 0x002c5801, - 0x002c802d, - 0x002df800, - 0x002e0801, - 0x002e2001, - 0x002e3808, - 0x002f5803, - 0x002fa810, - 0x0030800a, - 0x0030e000, - 0x00325814, - 0x00338000, - 0x0036b007, - 0x0036f805, - 0x00373801, - 0x00375003, - 0x00387001, - 0x00388800, - 0x0039801c, - 0x003d300a, - 0x003d900d, - 0x003f5808, - 0x003fd802, - 0x0040b003, - 0x0040d808, - 0x00412802, - 0x00414806, - 0x0041f800, - 0x0042c804, - 0x0042f800, - 0x00435804, - 0x00447810, - 0x00465038, - 0x0049d000, - 0x0049e000, - 0x004a0807, - 0x004a6800, - 0x004a8806, - 0x004b1001, - 0x004c0800, - 0x004c2000, - 0x004c6801, - 0x004c8801, - 0x004d4800, - 0x004d8800, - 0x004d9802, - 0x004dd002, - 0x004df000, - 0x004e0805, - 0x004e4801, - 0x004e6800, - 0x004e780c, - 0x004ef000, - 0x004f1003, - 0x004ff004, - 0x00502000, - 0x00505803, - 0x00508801, - 0x00514800, - 0x00518800, - 0x0051a000, - 0x0051b800, - 0x0051d003, - 0x00520817, - 0x0052e800, - 0x0052f806, - 0x00538001, - 0x0053a800, - 0x0053b80b, - 0x00542000, - 0x00547000, - 0x00549000, - 0x00554800, - 0x00558800, - 0x0055a000, - 0x0055d002, - 0x00560807, - 0x00565000, - 0x00566802, - 0x0056880e, - 0x00571003, - 0x00579006, - 0x0057d007, - 0x00582000, - 0x00586801, - 0x00588801, - 0x00594800, - 0x00598800, - 0x0059a000, - 0x0059d002, - 0x0059f001, - 0x005a0805, - 0x005a4801, - 0x005a680e, - 0x005af000, - 0x005b1003, - 0x005bc00a, - 0x005c2000, - 0x005c5802, - 0x005c8800, - 0x005cb002, - 0x005cd800, - 0x005ce800, - 0x005d0002, - 0x005d2802, - 0x005d5802, - 0x005dd004, - 0x005e0000, - 0x005e1802, - 0x005e4800, - 0x005e6802, - 0x005e8814, - 0x005fd805, - 0x00602000, - 0x00606800, - 0x00608800, - 0x00614800, - 0x0061d002, - 0x0061f002, - 0x00622812, - 0x0062d801, - 0x0062f001, - 0x00631003, - 0x00638006, - 0x00640800, - 0x00646800, - 0x00648800, - 0x00654800, - 0x0065a000, - 0x0065d002, - 0x0065f800, - 0x00661000, - 0x00662801, - 0x00664800, - 0x00666010, - 0x0066f800, - 0x00671003, - 0x00678000, - 0x0067a00d, - 0x00686800, - 0x00688800, - 0x0069d801, - 0x0069f000, - 0x006a0804, - 0x006a4800, - 0x006a6800, - 0x006a8003, - 0x006ab800, - 0x006b1003, - 0x006c0001, - 0x006c2000, - 0x006cb802, - 0x006d9000, - 0x006de000, - 0x006df001, - 0x006e3808, - 0x006e9005, - 0x006ef806, - 0x006f8001, - 0x006fa80b, - 0x00718800, - 0x0071a00a, - 0x00723807, - 0x0072e024, - 0x00741800, - 0x00742800, - 0x00745800, - 0x00752000, - 0x00753000, - 0x00758800, - 0x0075a008, - 0x0075f001, - 0x00762800, - 0x00763808, - 0x0076d001, - 0x0077001f, - 0x0078c001, - 0x0079a800, - 0x0079b800, - 0x0079c800, - 0x007a4000, - 0x007b6811, - 0x007c0004, - 0x007c3001, - 0x007c6830, - 0x007e3000, - 0x007e6800, - 0x007ed824, - 0x00816803, - 0x00819005, - 0x0081c801, - 0x0081e801, - 0x0082c001, - 0x0082f002, - 0x00838803, - 0x00841000, - 0x00842801, - 0x00846800, - 0x0084e800, - 0x00863000, - 0x00864004, - 0x00867001, - 0x00924800, - 0x00927001, - 0x0092b800, - 0x0092c800, - 0x0092f001, - 0x00944800, - 0x00947001, - 0x00958800, - 0x0095b001, - 0x0095f800, - 0x00960800, - 0x00963001, - 0x0096b800, - 0x00988800, - 0x0098b001, - 0x009ad804, - 0x009be802, - 0x009cd005, - 0x009fb001, - 0x009ff001, - 0x00b40000, - 0x00b4e802, - 0x00b7c806, - 0x00b89002, - 0x00b8b008, - 0x00b99001, - 0x00b9b808, - 0x00ba900d, - 0x00bb6800, - 0x00bb880e, - 0x00bda001, - 0x00bdb806, - 0x00be3000, - 0x00be480a, - 0x00bee802, - 0x00bf5005, - 0x00bfd005, - 0x00c05804, - 0x00c0d005, - 0x00c3c806, - 0x00c42801, - 0x00c54800, - 0x00c55804, - 0x00c7b009, - 0x00c8f803, - 0x00c93801, - 0x00c96003, - 0x00c99000, - 0x00c9c806, - 0x00ca0802, - 0x00cb7001, - 0x00cba80a, - 0x00cd6003, - 0x00ce5005, - 0x00ced802, - 0x00d0b801, - 0x00d0d802, - 0x00d2b000, - 0x00d2c008, - 0x00d31000, - 0x00d32807, - 0x00d3980c, - 0x00d45005, - 0x00d4d005, - 0x00d57055, - 0x00d9a006, - 0x00d9e000, - 0x00da1000, - 0x00da6802, - 0x00db5808, - 0x00dbf802, - 0x00dd1003, - 0x00dd4001, - 0x00dd5802, - 0x00df3000, - 0x00df4001, - 0x00df6800, - 0x00df7802, - 0x00dfa007, - 0x00e16007, - 0x00e1b004, - 0x00e25002, - 0x00e44806, - 0x00e5d801, - 0x00e6400a, - 0x00e6a00c, - 0x00e71006, - 0x00e76800, - 0x00e7a000, - 0x00e7c001, - 0x00e7d804, - 0x00ee003f, - 0x00f8b001, - 0x00f8f001, - 0x00fa3001, - 0x00fa7001, - 0x00fac000, - 0x00fad000, - 0x00fae000, - 0x00faf000, - 0x00fbf001, - 0x00fda800, - 0x00fe2800, - 0x00fea001, - 0x00fee000, - 0x00ff8001, - 0x00ffa800, - 0x00fff810, - 0x01014007, - 0x0102f810, - 0x01039001, - 0x01047800, - 0x0104e802, - 0x0106083e, - 0x010c6003, - 0x01213818, - 0x01225814, - 0x015ba001, - 0x015cb000, - 0x01677802, - 0x0167a004, - 0x01693000, - 0x01694004, - 0x01697001, - 0x016b4006, - 0x016b880e, - 0x016cb808, - 0x016d3800, - 0x016d7800, - 0x016db800, - 0x016df800, - 0x016e3800, - 0x016e7800, - 0x016eb800, - 0x016ef820, - 0x0172f021, - 0x0174d000, - 0x0177a00b, - 0x017eb019, - 0x01800000, - 0x01815005, - 0x01820000, - 0x0184b803, - 0x01880004, - 0x01898000, - 0x018c7800, - 0x018f200a, - 0x0190f800, - 0x05246802, - 0x05263808, - 0x05316013, - 0x05337803, - 0x0533a009, - 0x0534f001, - 0x05378001, - 0x0537c007, - 0x053e5804, - 0x053e9000, - 0x053ea000, - 0x053ed017, - 0x05401000, - 0x05403000, - 0x05405800, - 0x05412801, - 0x05416003, - 0x0541d005, - 0x0543c007, - 0x05462009, - 0x0546d017, - 0x0547f800, - 0x05493007, - 0x054a380a, - 0x054aa00a, - 0x054be805, - 0x054d9800, - 0x054db003, - 0x054de001, - 0x054e7000, - 0x054ed003, - 0x054f2800, - 0x054ff800, - 0x05514805, - 0x05518801, - 0x0551a80a, - 0x05521800, - 0x05526000, - 0x05527001, - 0x0552d001, - 0x0553e000, - 0x05558000, - 0x05559002, - 0x0555b801, - 0x0555f001, - 0x05560800, - 0x05561817, - 0x05576001, - 0x0557b00a, - 0x05583801, - 0x05587801, - 0x0558b808, - 0x05593800, - 0x05597800, - 0x055b6003, - 0x055f2800, - 0x055f4000, - 0x055f6802, - 0x055fd005, - 0x06bd200b, - 0x06be3803, - 0x06bfe7ff, - 0x06ffe7ff, - 0x073fe7ff, - 0x077fe7ff, - 0x07bfe103, - 0x07d37001, - 0x07d6d025, - 0x07d8380b, - 0x07d8c004, - 0x07d8f000, - 0x07d9b800, - 0x07d9e800, - 0x07d9f800, - 0x07da1000, - 0x07da2800, - 0x07de180f, - 0x07ec8001, - 0x07ee4006, - 0x07ee801f, - 0x07f0000f, - 0x07f0d015, - 0x07f29800, - 0x07f33800, - 0x07f36003, - 0x07f3a800, - 0x07f7e803, - 0x07fcf001, - 0x07fdf802, - 0x07fe4001, - 0x07fe8001, - 0x07fec001, - 0x07fee802, - 0x07ff3800, - 0x07ff780c, - 0x07fff001, - 0x08006000, - 0x08013800, - 0x0801d800, - 0x0801f000, - 0x08027001, - 0x0802f021, - 0x0807d804, - 0x08081803, - 0x0809a002, - 0x080c7800, - 0x080ce802, - 0x080d082e, - 0x080fe882, - 0x0814e802, - 0x0816880f, - 0x0817e003, - 0x08192008, - 0x081a5804, - 0x081bb009, - 0x081cf000, - 0x081e2003, - 0x081eb029, - 0x0824f001, - 0x08255005, - 0x0826a003, - 0x0827e003, - 0x08294007, - 0x082b200a, - 0x082bd800, - 0x082c5800, - 0x082c9800, - 0x082cb000, - 0x082d1000, - 0x082d9000, - 0x082dd000, - 0x082de842, - 0x0839b808, - 0x083ab009, - 0x083b4017, - 0x083c3000, - 0x083d8800, - 0x083dd844, - 0x08403001, - 0x08404800, - 0x0841b000, - 0x0841c802, - 0x0841e801, - 0x0842b000, - 0x0844f807, - 0x0845802f, - 0x08479800, - 0x0847b004, - 0x0848e002, - 0x0849d004, - 0x084a003f, - 0x084dc003, - 0x084e8001, - 0x0850080e, - 0x0850a000, - 0x0850c000, - 0x0851b009, - 0x08524806, - 0x0852c806, - 0x0855001f, - 0x08572805, - 0x0857b808, - 0x0859b002, - 0x085ab001, - 0x085b9804, - 0x085c9006, - 0x085ce80b, - 0x085d804f, - 0x08624836, - 0x0865980c, - 0x08679806, - 0x0869200b, - 0x0869d125, - 0x0873f800, - 0x08755002, - 0x08757001, - 0x0875904d, - 0x08794007, - 0x087a300a, - 0x087ad015, - 0x087c1003, - 0x087c5025, - 0x087e6013, - 0x087fb808, - 0x08800800, - 0x0881c00e, - 0x08827003, - 0x08838000, - 0x08839801, - 0x0883b00b, - 0x08859803, - 0x0885c801, - 0x0885e800, - 0x0886100d, - 0x08874806, - 0x0887d008, - 0x08893804, - 0x08896808, - 0x088a4007, - 0x088b9800, - 0x088bb80a, - 0x088db008, - 0x088e4803, - 0x088e7800, - 0x088f0000, - 0x088fa80a, - 0x08909000, - 0x08917802, - 0x0891a000, - 0x0891b001, - 0x0891f000, - 0x0892083e, - 0x08943800, - 0x08944800, - 0x08947000, - 0x0894f000, - 0x08955005, - 0x0896f800, - 0x0897180c, - 0x0897d007, - 0x08982000, - 0x08986801, - 0x08988801, - 0x08994800, - 0x08998800, - 0x0899a000, - 0x0899d002, - 0x0899f000, - 0x089a0000, - 0x089a2801, - 0x089a4801, - 0x089a7001, - 0x089a880b, - 0x089b209b, - 0x08a1c007, - 0x08a21002, - 0x08a23000, - 0x08a2e000, - 0x08a2f000, - 0x08a3101d, - 0x08a58000, - 0x08a59805, - 0x08a5d000, - 0x08a5e800, - 0x08a5f801, - 0x08a61001, - 0x08a64007, - 0x08a6d0a5, - 0x08ad7800, - 0x08ad9005, - 0x08ade001, - 0x08adf801, - 0x08aee023, - 0x08b19807, - 0x08b1e800, - 0x08b1f801, - 0x08b2280a, - 0x08b2d005, - 0x08b36812, - 0x08b55800, - 0x08b56800, - 0x08b58005, - 0x08b5b800, - 0x08b5d005, - 0x08b65035, - 0x08b8d804, - 0x08b91003, - 0x08b93808, - 0x08ba38b8, - 0x08c17808, - 0x08c1c801, - 0x08c1e063, - 0x08c7980b, - 0x08c83801, - 0x08c85001, - 0x08c8a000, - 0x08c8b800, - 0x08c98000, - 0x08c9b000, - 0x08c9c803, - 0x08c9f000, - 0x08ca1800, - 0x08ca3808, - 0x08cad045, - 0x08cd4001, - 0x08cea007, - 0x08cf0000, - 0x08cf281a, - 0x08d00809, - 0x08d19805, - 0x08d1d803, - 0x08d23808, - 0x08d28805, - 0x08d2c802, - 0x08d4500c, - 0x08d4c001, - 0x08d5180c, - 0x08d7c806, - 0x08d850f5, - 0x08e04800, - 0x08e1800d, - 0x08e1f800, - 0x08e23009, - 0x08e36802, - 0x08e48018, - 0x08e55006, - 0x08e59001, - 0x08e5a84a, - 0x08e83800, - 0x08e85000, - 0x08e98814, - 0x08ea3808, - 0x08ead005, - 0x08eb3000, - 0x08eb4800, - 0x08ec7803, - 0x08eca800, - 0x08ecb800, - 0x08ecc806, - 0x08ed5135, - 0x08f79801, - 0x08f7c808, - 0x08f88800, - 0x08f9b007, - 0x08fa0000, - 0x08fa1000, - 0x08fad055, - 0x08fd880e, - 0x08ff900c, - 0x091cd065, - 0x09237800, - 0x0923a80a, - 0x092a27ff, - 0x096a224b, - 0x097f980c, - 0x09a18010, - 0x09a23fff, - 0x09e23fb8, - 0x0a323fff, - 0x0a723fff, - 0x0ab23fff, - 0x0af23fff, - 0x0b3239b8, - 0x0b51c806, - 0x0b52f800, - 0x0b535003, - 0x0b55f800, - 0x0b565005, - 0x0b577006, - 0x0b57b009, - 0x0b598006, - 0x0b5a3009, - 0x0b5ad000, - 0x0b5b1000, - 0x0b5bc004, - 0x0b5c82af, - 0x0b74d864, - 0x0b7a5804, - 0x0b7c400a, - 0x0b7d003f, - 0x0b7f200b, - 0x0b7f900d, - 0x0c3fc007, - 0x0c66b029, - 0x0c684fff, - 0x0ca84fff, - 0x0ce84fff, - 0x0d284fff, - 0x0d684ae6, - 0x0d7fa000, - 0x0d7fe000, - 0x0d7ff800, - 0x0d89180e, - 0x0d89981c, - 0x0d8a9801, - 0x0d8ab00d, - 0x0d8b4007, - 0x0d97e7ff, - 0x0dd7e103, - 0x0de35804, - 0x0de3e802, - 0x0de44806, - 0x0de4d001, - 0x0de4e801, - 0x0de507ff, - 0x0e2507ff, - 0x0e6502af, - 0x0e7e203b, - 0x0e87b009, - 0x0e893801, - 0x0e8b2800, - 0x0e8b3802, - 0x0e8b7014, - 0x0e8c2806, - 0x0e8d5003, - 0x0e8f5814, - 0x0e921002, - 0x0e923079, - 0x0e96a00b, - 0x0e97a00b, - 0x0e9ab808, - 0x0e9bc886, - 0x0ea2a800, - 0x0ea4e800, - 0x0ea50001, - 0x0ea51801, - 0x0ea53801, - 0x0ea56800, - 0x0ea5d000, - 0x0ea5e000, - 0x0ea62000, - 0x0ea83000, - 0x0ea85801, - 0x0ea8a800, - 0x0ea8e800, - 0x0ea9d000, - 0x0ea9f800, - 0x0eaa2800, - 0x0eaa3802, - 0x0eaa8800, - 0x0eb53001, - 0x0ebe6001, - 0x0ed00036, - 0x0ed1d831, - 0x0ed3a800, - 0x0ed42000, - 0x0ed46473, - 0x0ef8f805, - 0x0ef95904, - 0x0f037091, - 0x0f096809, - 0x0f09f001, - 0x0f0a5003, - 0x0f0a813f, - 0x0f157011, - 0x0f176003, - 0x0f17d004, - 0x0f1801cf, - 0x0f276003, - 0x0f27d2e5, - 0x0f3f3800, - 0x0f3f6000, - 0x0f3f7800, - 0x0f3ff800, - 0x0f462801, - 0x0f46802f, - 0x0f4a2006, - 0x0f4a6003, - 0x0f4ad003, - 0x0f4b0310, - 0x0f65a84b, - 0x0f69f0c1, - 0x0f702000, - 0x0f710000, - 0x0f711800, - 0x0f712801, - 0x0f714000, - 0x0f719800, - 0x0f71c000, - 0x0f71d000, - 0x0f71e005, - 0x0f721803, - 0x0f724000, - 0x0f725000, - 0x0f726000, - 0x0f728000, - 0x0f729800, - 0x0f72a801, - 0x0f72c000, - 0x0f72d000, - 0x0f72e000, - 0x0f72f000, - 0x0f730000, - 0x0f731800, - 0x0f732801, - 0x0f735800, - 0x0f739800, - 0x0f73c000, - 0x0f73e800, - 0x0f73f800, - 0x0f745000, - 0x0f74e004, - 0x0f752000, - 0x0f755000, - 0x0f75e033, - 0x0f77910d, - 0x0f816003, - 0x0f84a00b, - 0x0f857801, - 0x0f860000, - 0x0f868000, - 0x0f87b009, - 0x0f8d7037, - 0x0f90180c, - 0x0f91e003, - 0x0f924806, - 0x0f92900d, - 0x0f933099, - 0x0fb6c003, - 0x0fb76802, - 0x0fb7e802, - 0x0fbbb803, - 0x0fbed005, - 0x0fbf6003, - 0x0fbf880e, - 0x0fc06003, - 0x0fc24007, - 0x0fc2d005, - 0x0fc44007, - 0x0fc57001, - 0x0fc5904d, - 0x0fd2a00b, - 0x0fd37001, - 0x0fd3e802, - 0x0fd44806, - 0x0fd5f000, - 0x0fd63007, - 0x0fd6e003, - 0x0fd74806, - 0x0fd7c806, - 0x0fdc9800, - 0x0fde5824, - 0x0fdfd405, - 0x1537001f, - 0x15b9d005, - 0x15c0f001, - 0x1675100d, - 0x175f080e, - 0x1772f7ff, - 0x17b2f1a1, - 0x17d0f5e1, - 0x189a5804}; +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1077] = { + 0x00000020 /* 00000000 - 00000020 [ 33] */, + 0x0003f821 /* 0000007f - 000000a0 [ 34] */, + 0x00056800 /* 000000ad - 000000ad [ 1] */, + 0x001bc001 /* 00000378 - 00000379 [ 2] */, + 0x001c0003 /* 00000380 - 00000383 [ 4] */, + 0x001c5800 /* 0000038b - 0000038b [ 1] */, + 0x001c6800 /* 0000038d - 0000038d [ 1] */, + 0x001d1000 /* 000003a2 - 000003a2 [ 1] */, + 0x00298000 /* 00000530 - 00000530 [ 1] */, + 0x002ab801 /* 00000557 - 00000558 [ 2] */, + 0x002c5801 /* 0000058b - 0000058c [ 2] */, + 0x002c8000 /* 00000590 - 00000590 [ 1] */, + 0x002e4007 /* 000005c8 - 000005cf [ 8] */, + 0x002f5803 /* 000005eb - 000005ee [ 4] */, + 0x002fa810 /* 000005f5 - 00000605 [ 17] */, + 0x0030e000 /* 0000061c - 0000061c [ 1] */, + 0x0036e800 /* 000006dd - 000006dd [ 1] */, + 0x00387001 /* 0000070e - 0000070f [ 2] */, + 0x003a5801 /* 0000074b - 0000074c [ 2] */, + 0x003d900d /* 000007b2 - 000007bf [ 14] */, + 0x003fd801 /* 000007fb - 000007fc [ 2] */, + 0x00417001 /* 0000082e - 0000082f [ 2] */, + 0x0041f800 /* 0000083f - 0000083f [ 1] */, + 0x0042e001 /* 0000085c - 0000085d [ 2] */, + 0x0042f800 /* 0000085f - 0000085f [ 1] */, + 0x00435804 /* 0000086b - 0000086f [ 5] */, + 0x00447808 /* 0000088f - 00000897 [ 9] */, + 0x00471000 /* 000008e2 - 000008e2 [ 1] */, + 0x004c2000 /* 00000984 - 00000984 [ 1] */, + 0x004c6801 /* 0000098d - 0000098e [ 2] */, + 0x004c8801 /* 00000991 - 00000992 [ 2] */, + 0x004d4800 /* 000009a9 - 000009a9 [ 1] */, + 0x004d8800 /* 000009b1 - 000009b1 [ 1] */, + 0x004d9802 /* 000009b3 - 000009b5 [ 3] */, + 0x004dd001 /* 000009ba - 000009bb [ 2] */, + 0x004e2801 /* 000009c5 - 000009c6 [ 2] */, + 0x004e4801 /* 000009c9 - 000009ca [ 2] */, + 0x004e7807 /* 000009cf - 000009d6 [ 8] */, + 0x004ec003 /* 000009d8 - 000009db [ 4] */, + 0x004ef000 /* 000009de - 000009de [ 1] */, + 0x004f2001 /* 000009e4 - 000009e5 [ 2] */, + 0x004ff801 /* 000009ff - 00000a00 [ 2] */, + 0x00502000 /* 00000a04 - 00000a04 [ 1] */, + 0x00505803 /* 00000a0b - 00000a0e [ 4] */, + 0x00508801 /* 00000a11 - 00000a12 [ 2] */, + 0x00514800 /* 00000a29 - 00000a29 [ 1] */, + 0x00518800 /* 00000a31 - 00000a31 [ 1] */, + 0x0051a000 /* 00000a34 - 00000a34 [ 1] */, + 0x0051b800 /* 00000a37 - 00000a37 [ 1] */, + 0x0051d001 /* 00000a3a - 00000a3b [ 2] */, + 0x0051e800 /* 00000a3d - 00000a3d [ 1] */, + 0x00521803 /* 00000a43 - 00000a46 [ 4] */, + 0x00524801 /* 00000a49 - 00000a4a [ 2] */, + 0x00527002 /* 00000a4e - 00000a50 [ 3] */, + 0x00529006 /* 00000a52 - 00000a58 [ 7] */, + 0x0052e800 /* 00000a5d - 00000a5d [ 1] */, + 0x0052f806 /* 00000a5f - 00000a65 [ 7] */, + 0x0053b809 /* 00000a77 - 00000a80 [ 10] */, + 0x00542000 /* 00000a84 - 00000a84 [ 1] */, + 0x00547000 /* 00000a8e - 00000a8e [ 1] */, + 0x00549000 /* 00000a92 - 00000a92 [ 1] */, + 0x00554800 /* 00000aa9 - 00000aa9 [ 1] */, + 0x00558800 /* 00000ab1 - 00000ab1 [ 1] */, + 0x0055a000 /* 00000ab4 - 00000ab4 [ 1] */, + 0x0055d001 /* 00000aba - 00000abb [ 2] */, + 0x00563000 /* 00000ac6 - 00000ac6 [ 1] */, + 0x00565000 /* 00000aca - 00000aca [ 1] */, + 0x00567001 /* 00000ace - 00000acf [ 2] */, + 0x0056880e /* 00000ad1 - 00000adf [ 15] */, + 0x00572001 /* 00000ae4 - 00000ae5 [ 2] */, + 0x00579006 /* 00000af2 - 00000af8 [ 7] */, + 0x00580000 /* 00000b00 - 00000b00 [ 1] */, + 0x00582000 /* 00000b04 - 00000b04 [ 1] */, + 0x00586801 /* 00000b0d - 00000b0e [ 2] */, + 0x00588801 /* 00000b11 - 00000b12 [ 2] */, + 0x00594800 /* 00000b29 - 00000b29 [ 1] */, + 0x00598800 /* 00000b31 - 00000b31 [ 1] */, + 0x0059a000 /* 00000b34 - 00000b34 [ 1] */, + 0x0059d001 /* 00000b3a - 00000b3b [ 2] */, + 0x005a2801 /* 00000b45 - 00000b46 [ 2] */, + 0x005a4801 /* 00000b49 - 00000b4a [ 2] */, + 0x005a7006 /* 00000b4e - 00000b54 [ 7] */, + 0x005ac003 /* 00000b58 - 00000b5b [ 4] */, + 0x005af000 /* 00000b5e - 00000b5e [ 1] */, + 0x005b2001 /* 00000b64 - 00000b65 [ 2] */, + 0x005bc009 /* 00000b78 - 00000b81 [ 10] */, + 0x005c2000 /* 00000b84 - 00000b84 [ 1] */, + 0x005c5802 /* 00000b8b - 00000b8d [ 3] */, + 0x005c8800 /* 00000b91 - 00000b91 [ 1] */, + 0x005cb002 /* 00000b96 - 00000b98 [ 3] */, + 0x005cd800 /* 00000b9b - 00000b9b [ 1] */, + 0x005ce800 /* 00000b9d - 00000b9d [ 1] */, + 0x005d0002 /* 00000ba0 - 00000ba2 [ 3] */, + 0x005d2802 /* 00000ba5 - 00000ba7 [ 3] */, + 0x005d5802 /* 00000bab - 00000bad [ 3] */, + 0x005dd003 /* 00000bba - 00000bbd [ 4] */, + 0x005e1802 /* 00000bc3 - 00000bc5 [ 3] */, + 0x005e4800 /* 00000bc9 - 00000bc9 [ 1] */, + 0x005e7001 /* 00000bce - 00000bcf [ 2] */, + 0x005e8805 /* 00000bd1 - 00000bd6 [ 6] */, + 0x005ec00d /* 00000bd8 - 00000be5 [ 14] */, + 0x005fd804 /* 00000bfb - 00000bff [ 5] */, + 0x00606800 /* 00000c0d - 00000c0d [ 1] */, + 0x00608800 /* 00000c11 - 00000c11 [ 1] */, + 0x00614800 /* 00000c29 - 00000c29 [ 1] */, + 0x0061d001 /* 00000c3a - 00000c3b [ 2] */, + 0x00622800 /* 00000c45 - 00000c45 [ 1] */, + 0x00624800 /* 00000c49 - 00000c49 [ 1] */, + 0x00627006 /* 00000c4e - 00000c54 [ 7] */, + 0x0062b800 /* 00000c57 - 00000c57 [ 1] */, + 0x0062d801 /* 00000c5b - 00000c5c [ 2] */, + 0x0062f001 /* 00000c5e - 00000c5f [ 2] */, + 0x00632001 /* 00000c64 - 00000c65 [ 2] */, + 0x00638006 /* 00000c70 - 00000c76 [ 7] */, + 0x00646800 /* 00000c8d - 00000c8d [ 1] */, + 0x00648800 /* 00000c91 - 00000c91 [ 1] */, + 0x00654800 /* 00000ca9 - 00000ca9 [ 1] */, + 0x0065a000 /* 00000cb4 - 00000cb4 [ 1] */, + 0x0065d001 /* 00000cba - 00000cbb [ 2] */, + 0x00662800 /* 00000cc5 - 00000cc5 [ 1] */, + 0x00664800 /* 00000cc9 - 00000cc9 [ 1] */, + 0x00667006 /* 00000cce - 00000cd4 [ 7] */, + 0x0066b805 /* 00000cd7 - 00000cdc [ 6] */, + 0x0066f800 /* 00000cdf - 00000cdf [ 1] */, + 0x00672001 /* 00000ce4 - 00000ce5 [ 2] */, + 0x00678000 /* 00000cf0 - 00000cf0 [ 1] */, + 0x0067a00b /* 00000cf4 - 00000cff [ 12] */, + 0x00686800 /* 00000d0d - 00000d0d [ 1] */, + 0x00688800 /* 00000d11 - 00000d11 [ 1] */, + 0x006a2800 /* 00000d45 - 00000d45 [ 1] */, + 0x006a4800 /* 00000d49 - 00000d49 [ 1] */, + 0x006a8003 /* 00000d50 - 00000d53 [ 4] */, + 0x006b2001 /* 00000d64 - 00000d65 [ 2] */, + 0x006c0000 /* 00000d80 - 00000d80 [ 1] */, + 0x006c2000 /* 00000d84 - 00000d84 [ 1] */, + 0x006cb802 /* 00000d97 - 00000d99 [ 3] */, + 0x006d9000 /* 00000db2 - 00000db2 [ 1] */, + 0x006de000 /* 00000dbc - 00000dbc [ 1] */, + 0x006df001 /* 00000dbe - 00000dbf [ 2] */, + 0x006e3802 /* 00000dc7 - 00000dc9 [ 3] */, + 0x006e5803 /* 00000dcb - 00000dce [ 4] */, + 0x006ea800 /* 00000dd5 - 00000dd5 [ 1] */, + 0x006eb800 /* 00000dd7 - 00000dd7 [ 1] */, + 0x006f0005 /* 00000de0 - 00000de5 [ 6] */, + 0x006f8001 /* 00000df0 - 00000df1 [ 2] */, + 0x006fa80b /* 00000df5 - 00000e00 [ 12] */, + 0x0071d803 /* 00000e3b - 00000e3e [ 4] */, + 0x0072e024 /* 00000e5c - 00000e80 [ 37] */, + 0x00741800 /* 00000e83 - 00000e83 [ 1] */, + 0x00742800 /* 00000e85 - 00000e85 [ 1] */, + 0x00745800 /* 00000e8b - 00000e8b [ 1] */, + 0x00752000 /* 00000ea4 - 00000ea4 [ 1] */, + 0x00753000 /* 00000ea6 - 00000ea6 [ 1] */, + 0x0075f001 /* 00000ebe - 00000ebf [ 2] */, + 0x00762800 /* 00000ec5 - 00000ec5 [ 1] */, + 0x00763800 /* 00000ec7 - 00000ec7 [ 1] */, + 0x00767800 /* 00000ecf - 00000ecf [ 1] */, + 0x0076d001 /* 00000eda - 00000edb [ 2] */, + 0x0077001f /* 00000ee0 - 00000eff [ 32] */, + 0x007a4000 /* 00000f48 - 00000f48 [ 1] */, + 0x007b6803 /* 00000f6d - 00000f70 [ 4] */, + 0x007cc000 /* 00000f98 - 00000f98 [ 1] */, + 0x007de800 /* 00000fbd - 00000fbd [ 1] */, + 0x007e6800 /* 00000fcd - 00000fcd [ 1] */, + 0x007ed824 /* 00000fdb - 00000fff [ 37] */, + 0x00863000 /* 000010c6 - 000010c6 [ 1] */, + 0x00864004 /* 000010c8 - 000010cc [ 5] */, + 0x00867001 /* 000010ce - 000010cf [ 2] */, + 0x00924800 /* 00001249 - 00001249 [ 1] */, + 0x00927001 /* 0000124e - 0000124f [ 2] */, + 0x0092b800 /* 00001257 - 00001257 [ 1] */, + 0x0092c800 /* 00001259 - 00001259 [ 1] */, + 0x0092f001 /* 0000125e - 0000125f [ 2] */, + 0x00944800 /* 00001289 - 00001289 [ 1] */, + 0x00947001 /* 0000128e - 0000128f [ 2] */, + 0x00958800 /* 000012b1 - 000012b1 [ 1] */, + 0x0095b001 /* 000012b6 - 000012b7 [ 2] */, + 0x0095f800 /* 000012bf - 000012bf [ 1] */, + 0x00960800 /* 000012c1 - 000012c1 [ 1] */, + 0x00963001 /* 000012c6 - 000012c7 [ 2] */, + 0x0096b800 /* 000012d7 - 000012d7 [ 1] */, + 0x00988800 /* 00001311 - 00001311 [ 1] */, + 0x0098b001 /* 00001316 - 00001317 [ 2] */, + 0x009ad801 /* 0000135b - 0000135c [ 2] */, + 0x009be802 /* 0000137d - 0000137f [ 3] */, + 0x009cd005 /* 0000139a - 0000139f [ 6] */, + 0x009fb001 /* 000013f6 - 000013f7 [ 2] */, + 0x009ff001 /* 000013fe - 000013ff [ 2] */, + 0x00b40000 /* 00001680 - 00001680 [ 1] */, + 0x00b4e802 /* 0000169d - 0000169f [ 3] */, + 0x00b7c806 /* 000016f9 - 000016ff [ 7] */, + 0x00b8b008 /* 00001716 - 0000171e [ 9] */, + 0x00b9b808 /* 00001737 - 0000173f [ 9] */, + 0x00baa00b /* 00001754 - 0000175f [ 12] */, + 0x00bb6800 /* 0000176d - 0000176d [ 1] */, + 0x00bb8800 /* 00001771 - 00001771 [ 1] */, + 0x00bba00b /* 00001774 - 0000177f [ 12] */, + 0x00bef001 /* 000017de - 000017df [ 2] */, + 0x00bf5005 /* 000017ea - 000017ef [ 6] */, + 0x00bfd005 /* 000017fa - 000017ff [ 6] */, + 0x00c07000 /* 0000180e - 0000180e [ 1] */, + 0x00c0d005 /* 0000181a - 0000181f [ 6] */, + 0x00c3c806 /* 00001879 - 0000187f [ 7] */, + 0x00c55804 /* 000018ab - 000018af [ 5] */, + 0x00c7b009 /* 000018f6 - 000018ff [ 10] */, + 0x00c8f800 /* 0000191f - 0000191f [ 1] */, + 0x00c96003 /* 0000192c - 0000192f [ 4] */, + 0x00c9e003 /* 0000193c - 0000193f [ 4] */, + 0x00ca0802 /* 00001941 - 00001943 [ 3] */, + 0x00cb7001 /* 0000196e - 0000196f [ 2] */, + 0x00cba80a /* 00001975 - 0000197f [ 11] */, + 0x00cd6003 /* 000019ac - 000019af [ 4] */, + 0x00ce5005 /* 000019ca - 000019cf [ 6] */, + 0x00ced802 /* 000019db - 000019dd [ 3] */, + 0x00d0e001 /* 00001a1c - 00001a1d [ 2] */, + 0x00d2f800 /* 00001a5f - 00001a5f [ 1] */, + 0x00d3e801 /* 00001a7d - 00001a7e [ 2] */, + 0x00d45005 /* 00001a8a - 00001a8f [ 6] */, + 0x00d4d005 /* 00001a9a - 00001a9f [ 6] */, + 0x00d57001 /* 00001aae - 00001aaf [ 2] */, + 0x00d67830 /* 00001acf - 00001aff [ 49] */, + 0x00da6802 /* 00001b4d - 00001b4f [ 3] */, + 0x00dbf800 /* 00001b7f - 00001b7f [ 1] */, + 0x00dfa007 /* 00001bf4 - 00001bfb [ 8] */, + 0x00e1c002 /* 00001c38 - 00001c3a [ 3] */, + 0x00e25002 /* 00001c4a - 00001c4c [ 3] */, + 0x00e44806 /* 00001c89 - 00001c8f [ 7] */, + 0x00e5d801 /* 00001cbb - 00001cbc [ 2] */, + 0x00e64007 /* 00001cc8 - 00001ccf [ 8] */, + 0x00e7d804 /* 00001cfb - 00001cff [ 5] */, + 0x00f8b001 /* 00001f16 - 00001f17 [ 2] */, + 0x00f8f001 /* 00001f1e - 00001f1f [ 2] */, + 0x00fa3001 /* 00001f46 - 00001f47 [ 2] */, + 0x00fa7001 /* 00001f4e - 00001f4f [ 2] */, + 0x00fac000 /* 00001f58 - 00001f58 [ 1] */, + 0x00fad000 /* 00001f5a - 00001f5a [ 1] */, + 0x00fae000 /* 00001f5c - 00001f5c [ 1] */, + 0x00faf000 /* 00001f5e - 00001f5e [ 1] */, + 0x00fbf001 /* 00001f7e - 00001f7f [ 2] */, + 0x00fda800 /* 00001fb5 - 00001fb5 [ 1] */, + 0x00fe2800 /* 00001fc5 - 00001fc5 [ 1] */, + 0x00fea001 /* 00001fd4 - 00001fd5 [ 2] */, + 0x00fee000 /* 00001fdc - 00001fdc [ 1] */, + 0x00ff8001 /* 00001ff0 - 00001ff1 [ 2] */, + 0x00ffa800 /* 00001ff5 - 00001ff5 [ 1] */, + 0x00fff810 /* 00001fff - 0000200f [ 17] */, + 0x01014007 /* 00002028 - 0000202f [ 8] */, + 0x0102f810 /* 0000205f - 0000206f [ 17] */, + 0x01039001 /* 00002072 - 00002073 [ 2] */, + 0x01047800 /* 0000208f - 0000208f [ 1] */, + 0x0104e802 /* 0000209d - 0000209f [ 3] */, + 0x0106080e /* 000020c1 - 000020cf [ 15] */, + 0x0107880e /* 000020f1 - 000020ff [ 15] */, + 0x010c6003 /* 0000218c - 0000218f [ 4] */, + 0x01213818 /* 00002427 - 0000243f [ 25] */, + 0x01225814 /* 0000244b - 0000245f [ 21] */, + 0x015ba001 /* 00002b74 - 00002b75 [ 2] */, + 0x015cb000 /* 00002b96 - 00002b96 [ 1] */, + 0x0167a004 /* 00002cf4 - 00002cf8 [ 5] */, + 0x01693000 /* 00002d26 - 00002d26 [ 1] */, + 0x01694004 /* 00002d28 - 00002d2c [ 5] */, + 0x01697001 /* 00002d2e - 00002d2f [ 2] */, + 0x016b4006 /* 00002d68 - 00002d6e [ 7] */, + 0x016b880d /* 00002d71 - 00002d7e [ 14] */, + 0x016cb808 /* 00002d97 - 00002d9f [ 9] */, + 0x016d3800 /* 00002da7 - 00002da7 [ 1] */, + 0x016d7800 /* 00002daf - 00002daf [ 1] */, + 0x016db800 /* 00002db7 - 00002db7 [ 1] */, + 0x016df800 /* 00002dbf - 00002dbf [ 1] */, + 0x016e3800 /* 00002dc7 - 00002dc7 [ 1] */, + 0x016e7800 /* 00002dcf - 00002dcf [ 1] */, + 0x016eb800 /* 00002dd7 - 00002dd7 [ 1] */, + 0x016ef800 /* 00002ddf - 00002ddf [ 1] */, + 0x0172f021 /* 00002e5e - 00002e7f [ 34] */, + 0x0174d000 /* 00002e9a - 00002e9a [ 1] */, + 0x0177a00b /* 00002ef4 - 00002eff [ 12] */, + 0x017eb019 /* 00002fd6 - 00002fef [ 26] */, + 0x01800000 /* 00003000 - 00003000 [ 1] */, + 0x01820000 /* 00003040 - 00003040 [ 1] */, + 0x0184b801 /* 00003097 - 00003098 [ 2] */, + 0x01880004 /* 00003100 - 00003104 [ 5] */, + 0x01898000 /* 00003130 - 00003130 [ 1] */, + 0x018c7800 /* 0000318f - 0000318f [ 1] */, + 0x018f200a /* 000031e4 - 000031ee [ 11] */, + 0x0190f800 /* 0000321f - 0000321f [ 1] */, + 0x05246802 /* 0000a48d - 0000a48f [ 3] */, + 0x05263808 /* 0000a4c7 - 0000a4cf [ 9] */, + 0x05316013 /* 0000a62c - 0000a63f [ 20] */, + 0x0537c007 /* 0000a6f8 - 0000a6ff [ 8] */, + 0x053e5804 /* 0000a7cb - 0000a7cf [ 5] */, + 0x053e9000 /* 0000a7d2 - 0000a7d2 [ 1] */, + 0x053ea000 /* 0000a7d4 - 0000a7d4 [ 1] */, + 0x053ed017 /* 0000a7da - 0000a7f1 [ 24] */, + 0x05416802 /* 0000a82d - 0000a82f [ 3] */, + 0x0541d005 /* 0000a83a - 0000a83f [ 6] */, + 0x0543c007 /* 0000a878 - 0000a87f [ 8] */, + 0x05463007 /* 0000a8c6 - 0000a8cd [ 8] */, + 0x0546d005 /* 0000a8da - 0000a8df [ 6] */, + 0x054aa00a /* 0000a954 - 0000a95e [ 11] */, + 0x054be802 /* 0000a97d - 0000a97f [ 3] */, + 0x054e7000 /* 0000a9ce - 0000a9ce [ 1] */, + 0x054ed003 /* 0000a9da - 0000a9dd [ 4] */, + 0x054ff800 /* 0000a9ff - 0000a9ff [ 1] */, + 0x0551b808 /* 0000aa37 - 0000aa3f [ 9] */, + 0x05527001 /* 0000aa4e - 0000aa4f [ 2] */, + 0x0552d001 /* 0000aa5a - 0000aa5b [ 2] */, + 0x05561817 /* 0000aac3 - 0000aada [ 24] */, + 0x0557b809 /* 0000aaf7 - 0000ab00 [ 10] */, + 0x05583801 /* 0000ab07 - 0000ab08 [ 2] */, + 0x05587801 /* 0000ab0f - 0000ab10 [ 2] */, + 0x0558b808 /* 0000ab17 - 0000ab1f [ 9] */, + 0x05593800 /* 0000ab27 - 0000ab27 [ 1] */, + 0x05597800 /* 0000ab2f - 0000ab2f [ 1] */, + 0x055b6003 /* 0000ab6c - 0000ab6f [ 4] */, + 0x055f7001 /* 0000abee - 0000abef [ 2] */, + 0x055fd005 /* 0000abfa - 0000abff [ 6] */, + 0x06bd200b /* 0000d7a4 - 0000d7af [ 12] */, + 0x06be3803 /* 0000d7c7 - 0000d7ca [ 4] */, + 0x06bfe7ff /* 0000d7fc - 0000dffb [ 2048] */, + 0x06ffe7ff /* 0000dffc - 0000e7fb [ 2048] */, + 0x073fe7ff /* 0000e7fc - 0000effb [ 2048] */, + 0x077fe7ff /* 0000effc - 0000f7fb [ 2048] */, + 0x07bfe103 /* 0000f7fc - 0000f8ff [ 260] */, + 0x07d37001 /* 0000fa6e - 0000fa6f [ 2] */, + 0x07d6d025 /* 0000fada - 0000faff [ 38] */, + 0x07d8380b /* 0000fb07 - 0000fb12 [ 12] */, + 0x07d8c004 /* 0000fb18 - 0000fb1c [ 5] */, + 0x07d9b800 /* 0000fb37 - 0000fb37 [ 1] */, + 0x07d9e800 /* 0000fb3d - 0000fb3d [ 1] */, + 0x07d9f800 /* 0000fb3f - 0000fb3f [ 1] */, + 0x07da1000 /* 0000fb42 - 0000fb42 [ 1] */, + 0x07da2800 /* 0000fb45 - 0000fb45 [ 1] */, + 0x07de180f /* 0000fbc3 - 0000fbd2 [ 16] */, + 0x07ec8001 /* 0000fd90 - 0000fd91 [ 2] */, + 0x07ee4006 /* 0000fdc8 - 0000fdce [ 7] */, + 0x07ee801f /* 0000fdd0 - 0000fdef [ 32] */, + 0x07f0d005 /* 0000fe1a - 0000fe1f [ 6] */, + 0x07f29800 /* 0000fe53 - 0000fe53 [ 1] */, + 0x07f33800 /* 0000fe67 - 0000fe67 [ 1] */, + 0x07f36003 /* 0000fe6c - 0000fe6f [ 4] */, + 0x07f3a800 /* 0000fe75 - 0000fe75 [ 1] */, + 0x07f7e803 /* 0000fefd - 0000ff00 [ 4] */, + 0x07fdf802 /* 0000ffbf - 0000ffc1 [ 3] */, + 0x07fe4001 /* 0000ffc8 - 0000ffc9 [ 2] */, + 0x07fe8001 /* 0000ffd0 - 0000ffd1 [ 2] */, + 0x07fec001 /* 0000ffd8 - 0000ffd9 [ 2] */, + 0x07fee802 /* 0000ffdd - 0000ffdf [ 3] */, + 0x07ff3800 /* 0000ffe7 - 0000ffe7 [ 1] */, + 0x07ff780c /* 0000ffef - 0000fffb [ 13] */, + 0x07fff001 /* 0000fffe - 0000ffff [ 2] */, + 0x08006000 /* 0001000c - 0001000c [ 1] */, + 0x08013800 /* 00010027 - 00010027 [ 1] */, + 0x0801d800 /* 0001003b - 0001003b [ 1] */, + 0x0801f000 /* 0001003e - 0001003e [ 1] */, + 0x08027001 /* 0001004e - 0001004f [ 2] */, + 0x0802f021 /* 0001005e - 0001007f [ 34] */, + 0x0807d804 /* 000100fb - 000100ff [ 5] */, + 0x08081803 /* 00010103 - 00010106 [ 4] */, + 0x0809a002 /* 00010134 - 00010136 [ 3] */, + 0x080c7800 /* 0001018f - 0001018f [ 1] */, + 0x080ce802 /* 0001019d - 0001019f [ 3] */, + 0x080d082e /* 000101a1 - 000101cf [ 47] */, + 0x080ff081 /* 000101fe - 0001027f [ 130] */, + 0x0814e802 /* 0001029d - 0001029f [ 3] */, + 0x0816880e /* 000102d1 - 000102df [ 15] */, + 0x0817e003 /* 000102fc - 000102ff [ 4] */, + 0x08192008 /* 00010324 - 0001032c [ 9] */, + 0x081a5804 /* 0001034b - 0001034f [ 5] */, + 0x081bd804 /* 0001037b - 0001037f [ 5] */, + 0x081cf000 /* 0001039e - 0001039e [ 1] */, + 0x081e2003 /* 000103c4 - 000103c7 [ 4] */, + 0x081eb029 /* 000103d6 - 000103ff [ 42] */, + 0x0824f001 /* 0001049e - 0001049f [ 2] */, + 0x08255005 /* 000104aa - 000104af [ 6] */, + 0x0826a003 /* 000104d4 - 000104d7 [ 4] */, + 0x0827e003 /* 000104fc - 000104ff [ 4] */, + 0x08294007 /* 00010528 - 0001052f [ 8] */, + 0x082b200a /* 00010564 - 0001056e [ 11] */, + 0x082bd800 /* 0001057b - 0001057b [ 1] */, + 0x082c5800 /* 0001058b - 0001058b [ 1] */, + 0x082c9800 /* 00010593 - 00010593 [ 1] */, + 0x082cb000 /* 00010596 - 00010596 [ 1] */, + 0x082d1000 /* 000105a2 - 000105a2 [ 1] */, + 0x082d9000 /* 000105b2 - 000105b2 [ 1] */, + 0x082dd000 /* 000105ba - 000105ba [ 1] */, + 0x082de842 /* 000105bd - 000105ff [ 67] */, + 0x0839b808 /* 00010737 - 0001073f [ 9] */, + 0x083ab009 /* 00010756 - 0001075f [ 10] */, + 0x083b4017 /* 00010768 - 0001077f [ 24] */, + 0x083c3000 /* 00010786 - 00010786 [ 1] */, + 0x083d8800 /* 000107b1 - 000107b1 [ 1] */, + 0x083dd844 /* 000107bb - 000107ff [ 69] */, + 0x08403001 /* 00010806 - 00010807 [ 2] */, + 0x08404800 /* 00010809 - 00010809 [ 1] */, + 0x0841b000 /* 00010836 - 00010836 [ 1] */, + 0x0841c802 /* 00010839 - 0001083b [ 3] */, + 0x0841e801 /* 0001083d - 0001083e [ 2] */, + 0x0842b000 /* 00010856 - 00010856 [ 1] */, + 0x0844f807 /* 0001089f - 000108a6 [ 8] */, + 0x0845802f /* 000108b0 - 000108df [ 48] */, + 0x08479800 /* 000108f3 - 000108f3 [ 1] */, + 0x0847b004 /* 000108f6 - 000108fa [ 5] */, + 0x0848e002 /* 0001091c - 0001091e [ 3] */, + 0x0849d004 /* 0001093a - 0001093e [ 5] */, + 0x084a003f /* 00010940 - 0001097f [ 64] */, + 0x084dc003 /* 000109b8 - 000109bb [ 4] */, + 0x084e8001 /* 000109d0 - 000109d1 [ 2] */, + 0x08502000 /* 00010a04 - 00010a04 [ 1] */, + 0x08503804 /* 00010a07 - 00010a0b [ 5] */, + 0x0850a000 /* 00010a14 - 00010a14 [ 1] */, + 0x0850c000 /* 00010a18 - 00010a18 [ 1] */, + 0x0851b001 /* 00010a36 - 00010a37 [ 2] */, + 0x0851d803 /* 00010a3b - 00010a3e [ 4] */, + 0x08524806 /* 00010a49 - 00010a4f [ 7] */, + 0x0852c806 /* 00010a59 - 00010a5f [ 7] */, + 0x0855001f /* 00010aa0 - 00010abf [ 32] */, + 0x08573803 /* 00010ae7 - 00010aea [ 4] */, + 0x0857b808 /* 00010af7 - 00010aff [ 9] */, + 0x0859b002 /* 00010b36 - 00010b38 [ 3] */, + 0x085ab001 /* 00010b56 - 00010b57 [ 2] */, + 0x085b9804 /* 00010b73 - 00010b77 [ 5] */, + 0x085c9006 /* 00010b92 - 00010b98 [ 7] */, + 0x085ce80b /* 00010b9d - 00010ba8 [ 12] */, + 0x085d804f /* 00010bb0 - 00010bff [ 80] */, + 0x08624836 /* 00010c49 - 00010c7f [ 55] */, + 0x0865980c /* 00010cb3 - 00010cbf [ 13] */, + 0x08679806 /* 00010cf3 - 00010cf9 [ 7] */, + 0x08694007 /* 00010d28 - 00010d2f [ 8] */, + 0x0869d125 /* 00010d3a - 00010e5f [ 294] */, + 0x0873f800 /* 00010e7f - 00010e7f [ 1] */, + 0x08755000 /* 00010eaa - 00010eaa [ 1] */, + 0x08757001 /* 00010eae - 00010eaf [ 2] */, + 0x0875904a /* 00010eb2 - 00010efc [ 75] */, + 0x08794007 /* 00010f28 - 00010f2f [ 8] */, + 0x087ad015 /* 00010f5a - 00010f6f [ 22] */, + 0x087c5025 /* 00010f8a - 00010faf [ 38] */, + 0x087e6013 /* 00010fcc - 00010fdf [ 20] */, + 0x087fb808 /* 00010ff7 - 00010fff [ 9] */, + 0x08827003 /* 0001104e - 00011051 [ 4] */, + 0x0883b008 /* 00011076 - 0001107e [ 9] */, + 0x0885e800 /* 000110bd - 000110bd [ 1] */, + 0x0886180c /* 000110c3 - 000110cf [ 13] */, + 0x08874806 /* 000110e9 - 000110ef [ 7] */, + 0x0887d005 /* 000110fa - 000110ff [ 6] */, + 0x0889a800 /* 00011135 - 00011135 [ 1] */, + 0x088a4007 /* 00011148 - 0001114f [ 8] */, + 0x088bb808 /* 00011177 - 0001117f [ 9] */, + 0x088f0000 /* 000111e0 - 000111e0 [ 1] */, + 0x088fa80a /* 000111f5 - 000111ff [ 11] */, + 0x08909000 /* 00011212 - 00011212 [ 1] */, + 0x0892103d /* 00011242 - 0001127f [ 62] */, + 0x08943800 /* 00011287 - 00011287 [ 1] */, + 0x08944800 /* 00011289 - 00011289 [ 1] */, + 0x08947000 /* 0001128e - 0001128e [ 1] */, + 0x0894f000 /* 0001129e - 0001129e [ 1] */, + 0x08955005 /* 000112aa - 000112af [ 6] */, + 0x08975804 /* 000112eb - 000112ef [ 5] */, + 0x0897d005 /* 000112fa - 000112ff [ 6] */, + 0x08982000 /* 00011304 - 00011304 [ 1] */, + 0x08986801 /* 0001130d - 0001130e [ 2] */, + 0x08988801 /* 00011311 - 00011312 [ 2] */, + 0x08994800 /* 00011329 - 00011329 [ 1] */, + 0x08998800 /* 00011331 - 00011331 [ 1] */, + 0x0899a000 /* 00011334 - 00011334 [ 1] */, + 0x0899d000 /* 0001133a - 0001133a [ 1] */, + 0x089a2801 /* 00011345 - 00011346 [ 2] */, + 0x089a4801 /* 00011349 - 0001134a [ 2] */, + 0x089a7001 /* 0001134e - 0001134f [ 2] */, + 0x089a8805 /* 00011351 - 00011356 [ 6] */, + 0x089ac004 /* 00011358 - 0001135c [ 5] */, + 0x089b2001 /* 00011364 - 00011365 [ 2] */, + 0x089b6802 /* 0001136d - 0001136f [ 3] */, + 0x089ba88a /* 00011375 - 000113ff [ 139] */, + 0x08a2e000 /* 0001145c - 0001145c [ 1] */, + 0x08a3101d /* 00011462 - 0001147f [ 30] */, + 0x08a64007 /* 000114c8 - 000114cf [ 8] */, + 0x08a6d0a5 /* 000114da - 0001157f [ 166] */, + 0x08adb001 /* 000115b6 - 000115b7 [ 2] */, + 0x08aef021 /* 000115de - 000115ff [ 34] */, + 0x08b2280a /* 00011645 - 0001164f [ 11] */, + 0x08b2d005 /* 0001165a - 0001165f [ 6] */, + 0x08b36812 /* 0001166d - 0001167f [ 19] */, + 0x08b5d005 /* 000116ba - 000116bf [ 6] */, + 0x08b65035 /* 000116ca - 000116ff [ 54] */, + 0x08b8d801 /* 0001171b - 0001171c [ 2] */, + 0x08b96003 /* 0001172c - 0001172f [ 4] */, + 0x08ba38b8 /* 00011747 - 000117ff [ 185] */, + 0x08c1e063 /* 0001183c - 0001189f [ 100] */, + 0x08c7980b /* 000118f3 - 000118fe [ 12] */, + 0x08c83801 /* 00011907 - 00011908 [ 2] */, + 0x08c85001 /* 0001190a - 0001190b [ 2] */, + 0x08c8a000 /* 00011914 - 00011914 [ 1] */, + 0x08c8b800 /* 00011917 - 00011917 [ 1] */, + 0x08c9b000 /* 00011936 - 00011936 [ 1] */, + 0x08c9c801 /* 00011939 - 0001193a [ 2] */, + 0x08ca3808 /* 00011947 - 0001194f [ 9] */, + 0x08cad045 /* 0001195a - 0001199f [ 70] */, + 0x08cd4001 /* 000119a8 - 000119a9 [ 2] */, + 0x08cec001 /* 000119d8 - 000119d9 [ 2] */, + 0x08cf281a /* 000119e5 - 000119ff [ 27] */, + 0x08d24007 /* 00011a48 - 00011a4f [ 8] */, + 0x08d5180c /* 00011aa3 - 00011aaf [ 13] */, + 0x08d7c806 /* 00011af9 - 00011aff [ 7] */, + 0x08d850f5 /* 00011b0a - 00011bff [ 246] */, + 0x08e04800 /* 00011c09 - 00011c09 [ 1] */, + 0x08e1b800 /* 00011c37 - 00011c37 [ 1] */, + 0x08e23009 /* 00011c46 - 00011c4f [ 10] */, + 0x08e36802 /* 00011c6d - 00011c6f [ 3] */, + 0x08e48001 /* 00011c90 - 00011c91 [ 2] */, + 0x08e54000 /* 00011ca8 - 00011ca8 [ 1] */, + 0x08e5b848 /* 00011cb7 - 00011cff [ 73] */, + 0x08e83800 /* 00011d07 - 00011d07 [ 1] */, + 0x08e85000 /* 00011d0a - 00011d0a [ 1] */, + 0x08e9b802 /* 00011d37 - 00011d39 [ 3] */, + 0x08e9d800 /* 00011d3b - 00011d3b [ 1] */, + 0x08e9f000 /* 00011d3e - 00011d3e [ 1] */, + 0x08ea4007 /* 00011d48 - 00011d4f [ 8] */, + 0x08ead005 /* 00011d5a - 00011d5f [ 6] */, + 0x08eb3000 /* 00011d66 - 00011d66 [ 1] */, + 0x08eb4800 /* 00011d69 - 00011d69 [ 1] */, + 0x08ec7800 /* 00011d8f - 00011d8f [ 1] */, + 0x08ec9000 /* 00011d92 - 00011d92 [ 1] */, + 0x08ecc806 /* 00011d99 - 00011d9f [ 7] */, + 0x08ed5135 /* 00011daa - 00011edf [ 310] */, + 0x08f7c806 /* 00011ef9 - 00011eff [ 7] */, + 0x08f88800 /* 00011f11 - 00011f11 [ 1] */, + 0x08f9d802 /* 00011f3b - 00011f3d [ 3] */, + 0x08fad055 /* 00011f5a - 00011faf [ 86] */, + 0x08fd880e /* 00011fb1 - 00011fbf [ 15] */, + 0x08ff900c /* 00011ff2 - 00011ffe [ 13] */, + 0x091cd065 /* 0001239a - 000123ff [ 102] */, + 0x09237800 /* 0001246f - 0001246f [ 1] */, + 0x0923a80a /* 00012475 - 0001247f [ 11] */, + 0x092a27ff /* 00012544 - 00012d43 [ 2048] */, + 0x096a224b /* 00012d44 - 00012f8f [ 588] */, + 0x097f980c /* 00012ff3 - 00012fff [ 13] */, + 0x09a1800f /* 00013430 - 0001343f [ 16] */, + 0x09a2b7ff /* 00013456 - 00013c55 [ 2048] */, + 0x09e2b7a9 /* 00013c56 - 000143ff [ 1962] */, + 0x0a323fff /* 00014647 - 00014e46 [ 2048] */, + 0x0a723fff /* 00014e47 - 00015646 [ 2048] */, + 0x0ab23fff /* 00015647 - 00015e46 [ 2048] */, + 0x0af23fff /* 00015e47 - 00016646 [ 2048] */, + 0x0b3239b8 /* 00016647 - 000167ff [ 441] */, + 0x0b51c806 /* 00016a39 - 00016a3f [ 7] */, + 0x0b52f800 /* 00016a5f - 00016a5f [ 1] */, + 0x0b535003 /* 00016a6a - 00016a6d [ 4] */, + 0x0b55f800 /* 00016abf - 00016abf [ 1] */, + 0x0b565005 /* 00016aca - 00016acf [ 6] */, + 0x0b577001 /* 00016aee - 00016aef [ 2] */, + 0x0b57b009 /* 00016af6 - 00016aff [ 10] */, + 0x0b5a3009 /* 00016b46 - 00016b4f [ 10] */, + 0x0b5ad000 /* 00016b5a - 00016b5a [ 1] */, + 0x0b5b1000 /* 00016b62 - 00016b62 [ 1] */, + 0x0b5bc004 /* 00016b78 - 00016b7c [ 5] */, + 0x0b5c82af /* 00016b90 - 00016e3f [ 688] */, + 0x0b74d864 /* 00016e9b - 00016eff [ 101] */, + 0x0b7a5803 /* 00016f4b - 00016f4e [ 4] */, + 0x0b7c4006 /* 00016f88 - 00016f8e [ 7] */, + 0x0b7d003f /* 00016fa0 - 00016fdf [ 64] */, + 0x0b7f280a /* 00016fe5 - 00016fef [ 11] */, + 0x0b7f900d /* 00016ff2 - 00016fff [ 14] */, + 0x0c3fc007 /* 000187f8 - 000187ff [ 8] */, + 0x0c66b029 /* 00018cd6 - 00018cff [ 42] */, + 0x0c684fff /* 00018d09 - 00019508 [ 2048] */, + 0x0ca84fff /* 00019509 - 00019d08 [ 2048] */, + 0x0ce84fff /* 00019d09 - 0001a508 [ 2048] */, + 0x0d284fff /* 0001a509 - 0001ad08 [ 2048] */, + 0x0d684ae6 /* 0001ad09 - 0001afef [ 743] */, + 0x0d7fa000 /* 0001aff4 - 0001aff4 [ 1] */, + 0x0d7fe000 /* 0001affc - 0001affc [ 1] */, + 0x0d7ff800 /* 0001afff - 0001afff [ 1] */, + 0x0d89180e /* 0001b123 - 0001b131 [ 15] */, + 0x0d89981c /* 0001b133 - 0001b14f [ 29] */, + 0x0d8a9801 /* 0001b153 - 0001b154 [ 2] */, + 0x0d8ab00d /* 0001b156 - 0001b163 [ 14] */, + 0x0d8b4007 /* 0001b168 - 0001b16f [ 8] */, + 0x0d97e7ff /* 0001b2fc - 0001bafb [ 2048] */, + 0x0dd7e103 /* 0001bafc - 0001bbff [ 260] */, + 0x0de35804 /* 0001bc6b - 0001bc6f [ 5] */, + 0x0de3e802 /* 0001bc7d - 0001bc7f [ 3] */, + 0x0de44806 /* 0001bc89 - 0001bc8f [ 7] */, + 0x0de4d001 /* 0001bc9a - 0001bc9b [ 2] */, + 0x0de507ff /* 0001bca0 - 0001c49f [ 2048] */, + 0x0e2507ff /* 0001c4a0 - 0001cc9f [ 2048] */, + 0x0e65025f /* 0001cca0 - 0001ceff [ 608] */, + 0x0e797001 /* 0001cf2e - 0001cf2f [ 2] */, + 0x0e7a3808 /* 0001cf47 - 0001cf4f [ 9] */, + 0x0e7e203b /* 0001cfc4 - 0001cfff [ 60] */, + 0x0e87b009 /* 0001d0f6 - 0001d0ff [ 10] */, + 0x0e893801 /* 0001d127 - 0001d128 [ 2] */, + 0x0e8b9807 /* 0001d173 - 0001d17a [ 8] */, + 0x0e8f5814 /* 0001d1eb - 0001d1ff [ 21] */, + 0x0e923079 /* 0001d246 - 0001d2bf [ 122] */, + 0x0e96a00b /* 0001d2d4 - 0001d2df [ 12] */, + 0x0e97a00b /* 0001d2f4 - 0001d2ff [ 12] */, + 0x0e9ab808 /* 0001d357 - 0001d35f [ 9] */, + 0x0e9bc886 /* 0001d379 - 0001d3ff [ 135] */, + 0x0ea2a800 /* 0001d455 - 0001d455 [ 1] */, + 0x0ea4e800 /* 0001d49d - 0001d49d [ 1] */, + 0x0ea50001 /* 0001d4a0 - 0001d4a1 [ 2] */, + 0x0ea51801 /* 0001d4a3 - 0001d4a4 [ 2] */, + 0x0ea53801 /* 0001d4a7 - 0001d4a8 [ 2] */, + 0x0ea56800 /* 0001d4ad - 0001d4ad [ 1] */, + 0x0ea5d000 /* 0001d4ba - 0001d4ba [ 1] */, + 0x0ea5e000 /* 0001d4bc - 0001d4bc [ 1] */, + 0x0ea62000 /* 0001d4c4 - 0001d4c4 [ 1] */, + 0x0ea83000 /* 0001d506 - 0001d506 [ 1] */, + 0x0ea85801 /* 0001d50b - 0001d50c [ 2] */, + 0x0ea8a800 /* 0001d515 - 0001d515 [ 1] */, + 0x0ea8e800 /* 0001d51d - 0001d51d [ 1] */, + 0x0ea9d000 /* 0001d53a - 0001d53a [ 1] */, + 0x0ea9f800 /* 0001d53f - 0001d53f [ 1] */, + 0x0eaa2800 /* 0001d545 - 0001d545 [ 1] */, + 0x0eaa3802 /* 0001d547 - 0001d549 [ 3] */, + 0x0eaa8800 /* 0001d551 - 0001d551 [ 1] */, + 0x0eb53001 /* 0001d6a6 - 0001d6a7 [ 2] */, + 0x0ebe6001 /* 0001d7cc - 0001d7cd [ 2] */, + 0x0ed4600e /* 0001da8c - 0001da9a [ 15] */, + 0x0ed50000 /* 0001daa0 - 0001daa0 [ 1] */, + 0x0ed5844f /* 0001dab0 - 0001deff [ 1104] */, + 0x0ef8f805 /* 0001df1f - 0001df24 [ 6] */, + 0x0ef958d4 /* 0001df2b - 0001dfff [ 213] */, + 0x0f003800 /* 0001e007 - 0001e007 [ 1] */, + 0x0f00c801 /* 0001e019 - 0001e01a [ 2] */, + 0x0f011000 /* 0001e022 - 0001e022 [ 1] */, + 0x0f012800 /* 0001e025 - 0001e025 [ 1] */, + 0x0f015804 /* 0001e02b - 0001e02f [ 5] */, + 0x0f037020 /* 0001e06e - 0001e08e [ 33] */, + 0x0f04806f /* 0001e090 - 0001e0ff [ 112] */, + 0x0f096802 /* 0001e12d - 0001e12f [ 3] */, + 0x0f09f001 /* 0001e13e - 0001e13f [ 2] */, + 0x0f0a5003 /* 0001e14a - 0001e14d [ 4] */, + 0x0f0a813f /* 0001e150 - 0001e28f [ 320] */, + 0x0f157810 /* 0001e2af - 0001e2bf [ 17] */, + 0x0f17d004 /* 0001e2fa - 0001e2fe [ 5] */, + 0x0f1801cf /* 0001e300 - 0001e4cf [ 464] */, + 0x0f27d2e5 /* 0001e4fa - 0001e7df [ 742] */, + 0x0f3f3800 /* 0001e7e7 - 0001e7e7 [ 1] */, + 0x0f3f6000 /* 0001e7ec - 0001e7ec [ 1] */, + 0x0f3f7800 /* 0001e7ef - 0001e7ef [ 1] */, + 0x0f3ff800 /* 0001e7ff - 0001e7ff [ 1] */, + 0x0f462801 /* 0001e8c5 - 0001e8c6 [ 2] */, + 0x0f46b828 /* 0001e8d7 - 0001e8ff [ 41] */, + 0x0f4a6003 /* 0001e94c - 0001e94f [ 4] */, + 0x0f4ad003 /* 0001e95a - 0001e95d [ 4] */, + 0x0f4b0310 /* 0001e960 - 0001ec70 [ 785] */, + 0x0f65a84b /* 0001ecb5 - 0001ed00 [ 76] */, + 0x0f69f0c1 /* 0001ed3e - 0001edff [ 194] */, + 0x0f702000 /* 0001ee04 - 0001ee04 [ 1] */, + 0x0f710000 /* 0001ee20 - 0001ee20 [ 1] */, + 0x0f711800 /* 0001ee23 - 0001ee23 [ 1] */, + 0x0f712801 /* 0001ee25 - 0001ee26 [ 2] */, + 0x0f714000 /* 0001ee28 - 0001ee28 [ 1] */, + 0x0f719800 /* 0001ee33 - 0001ee33 [ 1] */, + 0x0f71c000 /* 0001ee38 - 0001ee38 [ 1] */, + 0x0f71d000 /* 0001ee3a - 0001ee3a [ 1] */, + 0x0f71e005 /* 0001ee3c - 0001ee41 [ 6] */, + 0x0f721803 /* 0001ee43 - 0001ee46 [ 4] */, + 0x0f724000 /* 0001ee48 - 0001ee48 [ 1] */, + 0x0f725000 /* 0001ee4a - 0001ee4a [ 1] */, + 0x0f726000 /* 0001ee4c - 0001ee4c [ 1] */, + 0x0f728000 /* 0001ee50 - 0001ee50 [ 1] */, + 0x0f729800 /* 0001ee53 - 0001ee53 [ 1] */, + 0x0f72a801 /* 0001ee55 - 0001ee56 [ 2] */, + 0x0f72c000 /* 0001ee58 - 0001ee58 [ 1] */, + 0x0f72d000 /* 0001ee5a - 0001ee5a [ 1] */, + 0x0f72e000 /* 0001ee5c - 0001ee5c [ 1] */, + 0x0f72f000 /* 0001ee5e - 0001ee5e [ 1] */, + 0x0f730000 /* 0001ee60 - 0001ee60 [ 1] */, + 0x0f731800 /* 0001ee63 - 0001ee63 [ 1] */, + 0x0f732801 /* 0001ee65 - 0001ee66 [ 2] */, + 0x0f735800 /* 0001ee6b - 0001ee6b [ 1] */, + 0x0f739800 /* 0001ee73 - 0001ee73 [ 1] */, + 0x0f73c000 /* 0001ee78 - 0001ee78 [ 1] */, + 0x0f73e800 /* 0001ee7d - 0001ee7d [ 1] */, + 0x0f73f800 /* 0001ee7f - 0001ee7f [ 1] */, + 0x0f745000 /* 0001ee8a - 0001ee8a [ 1] */, + 0x0f74e004 /* 0001ee9c - 0001eea0 [ 5] */, + 0x0f752000 /* 0001eea4 - 0001eea4 [ 1] */, + 0x0f755000 /* 0001eeaa - 0001eeaa [ 1] */, + 0x0f75e033 /* 0001eebc - 0001eeef [ 52] */, + 0x0f77910d /* 0001eef2 - 0001efff [ 270] */, + 0x0f816003 /* 0001f02c - 0001f02f [ 4] */, + 0x0f84a00b /* 0001f094 - 0001f09f [ 12] */, + 0x0f857801 /* 0001f0af - 0001f0b0 [ 2] */, + 0x0f860000 /* 0001f0c0 - 0001f0c0 [ 1] */, + 0x0f868000 /* 0001f0d0 - 0001f0d0 [ 1] */, + 0x0f87b009 /* 0001f0f6 - 0001f0ff [ 10] */, + 0x0f8d7037 /* 0001f1ae - 0001f1e5 [ 56] */, + 0x0f90180c /* 0001f203 - 0001f20f [ 13] */, + 0x0f91e003 /* 0001f23c - 0001f23f [ 4] */, + 0x0f924806 /* 0001f249 - 0001f24f [ 7] */, + 0x0f92900d /* 0001f252 - 0001f25f [ 14] */, + 0x0f933099 /* 0001f266 - 0001f2ff [ 154] */, + 0x0fb6c003 /* 0001f6d8 - 0001f6db [ 4] */, + 0x0fb76802 /* 0001f6ed - 0001f6ef [ 3] */, + 0x0fb7e802 /* 0001f6fd - 0001f6ff [ 3] */, + 0x0fbbb803 /* 0001f777 - 0001f77a [ 4] */, + 0x0fbed005 /* 0001f7da - 0001f7df [ 6] */, + 0x0fbf6003 /* 0001f7ec - 0001f7ef [ 4] */, + 0x0fbf880e /* 0001f7f1 - 0001f7ff [ 15] */, + 0x0fc06003 /* 0001f80c - 0001f80f [ 4] */, + 0x0fc24007 /* 0001f848 - 0001f84f [ 8] */, + 0x0fc2d005 /* 0001f85a - 0001f85f [ 6] */, + 0x0fc44007 /* 0001f888 - 0001f88f [ 8] */, + 0x0fc57001 /* 0001f8ae - 0001f8af [ 2] */, + 0x0fc5904d /* 0001f8b2 - 0001f8ff [ 78] */, + 0x0fd2a00b /* 0001fa54 - 0001fa5f [ 12] */, + 0x0fd37001 /* 0001fa6e - 0001fa6f [ 2] */, + 0x0fd3e802 /* 0001fa7d - 0001fa7f [ 3] */, + 0x0fd44806 /* 0001fa89 - 0001fa8f [ 7] */, + 0x0fd5f000 /* 0001fabe - 0001fabe [ 1] */, + 0x0fd63007 /* 0001fac6 - 0001facd [ 8] */, + 0x0fd6e003 /* 0001fadc - 0001fadf [ 4] */, + 0x0fd74806 /* 0001fae9 - 0001faef [ 7] */, + 0x0fd7c806 /* 0001faf9 - 0001faff [ 7] */, + 0x0fdc9800 /* 0001fb93 - 0001fb93 [ 1] */, + 0x0fde5824 /* 0001fbcb - 0001fbef [ 37] */, + 0x0fdfd405 /* 0001fbfa - 0001ffff [ 1030] */, + 0x1537001f /* 0002a6e0 - 0002a6ff [ 32] */, + 0x15b9d005 /* 0002b73a - 0002b73f [ 6] */, + 0x15c0f001 /* 0002b81e - 0002b81f [ 2] */, + 0x1675100d /* 0002cea2 - 0002ceaf [ 14] */, + 0x175f080e /* 0002ebe1 - 0002ebef [ 15] */, + 0x1772f7ff /* 0002ee5e - 0002f65d [ 2048] */, + 0x17b2f1a1 /* 0002f65e - 0002f7ff [ 418] */, + 0x17d0f5e1 /* 0002fa1e - 0002ffff [ 1506] */, + 0x189a5804 /* 0003134b - 0003134f [ 5] */, + 0x191d87ff /* 000323b0 - 00032baf [ 2048] */, + 0x195d87ff /* 00032bb0 - 000333af [ 2048] */, + 0x199d87ff /* 000333b0 - 00033baf [ 2048] */, + 0x19dd87ff /* 00033bb0 - 000343af [ 2048] */, + 0x1a1d87ff /* 000343b0 - 00034baf [ 2048] */, + 0x1a5d87ff /* 00034bb0 - 000353af [ 2048] */, + 0x1a9d87ff /* 000353b0 - 00035baf [ 2048] */, + 0x1add87ff /* 00035bb0 - 000363af [ 2048] */, + 0x1b1d87ff /* 000363b0 - 00036baf [ 2048] */, + 0x1b5d87ff /* 00036bb0 - 000373af [ 2048] */, + 0x1b9d87ff /* 000373b0 - 00037baf [ 2048] */, + 0x1bdd87ff /* 00037bb0 - 000383af [ 2048] */, + 0x1c1d87ff /* 000383b0 - 00038baf [ 2048] */, + 0x1c5d87ff /* 00038bb0 - 000393af [ 2048] */, + 0x1c9d87ff /* 000393b0 - 00039baf [ 2048] */, + 0x1cdd87ff /* 00039bb0 - 0003a3af [ 2048] */, + 0x1d1d87ff /* 0003a3b0 - 0003abaf [ 2048] */, + 0x1d5d87ff /* 0003abb0 - 0003b3af [ 2048] */, + 0x1d9d87ff /* 0003b3b0 - 0003bbaf [ 2048] */, + 0x1ddd87ff /* 0003bbb0 - 0003c3af [ 2048] */, + 0x1e1d87ff /* 0003c3b0 - 0003cbaf [ 2048] */, + 0x1e5d87ff /* 0003cbb0 - 0003d3af [ 2048] */, + 0x1e9d87ff /* 0003d3b0 - 0003dbaf [ 2048] */, + 0x1edd87ff /* 0003dbb0 - 0003e3af [ 2048] */, + 0x1f1d87ff /* 0003e3b0 - 0003ebaf [ 2048] */, + 0x1f5d87ff /* 0003ebb0 - 0003f3af [ 2048] */, + 0x1f9d87ff /* 0003f3b0 - 0003fbaf [ 2048] */, + 0x1fdd87ff /* 0003fbb0 - 000403af [ 2048] */, + 0x201d87ff /* 000403b0 - 00040baf [ 2048] */, + 0x205d87ff /* 00040bb0 - 000413af [ 2048] */, + 0x209d87ff /* 000413b0 - 00041baf [ 2048] */, + 0x20dd87ff /* 00041bb0 - 000423af [ 2048] */, + 0x211d87ff /* 000423b0 - 00042baf [ 2048] */, + 0x215d87ff /* 00042bb0 - 000433af [ 2048] */, + 0x219d87ff /* 000433b0 - 00043baf [ 2048] */, + 0x21dd87ff /* 00043bb0 - 000443af [ 2048] */, + 0x221d87ff /* 000443b0 - 00044baf [ 2048] */, + 0x225d87ff /* 00044bb0 - 000453af [ 2048] */, + 0x229d87ff /* 000453b0 - 00045baf [ 2048] */, + 0x22dd87ff /* 00045bb0 - 000463af [ 2048] */, + 0x231d87ff /* 000463b0 - 00046baf [ 2048] */, + 0x235d87ff /* 00046bb0 - 000473af [ 2048] */, + 0x239d87ff /* 000473b0 - 00047baf [ 2048] */, + 0x23dd87ff /* 00047bb0 - 000483af [ 2048] */, + 0x241d87ff /* 000483b0 - 00048baf [ 2048] */, + 0x245d87ff /* 00048bb0 - 000493af [ 2048] */, + 0x249d87ff /* 000493b0 - 00049baf [ 2048] */, + 0x24dd87ff /* 00049bb0 - 0004a3af [ 2048] */, + 0x251d87ff /* 0004a3b0 - 0004abaf [ 2048] */, + 0x255d87ff /* 0004abb0 - 0004b3af [ 2048] */, + 0x259d87ff /* 0004b3b0 - 0004bbaf [ 2048] */, + 0x25dd87ff /* 0004bbb0 - 0004c3af [ 2048] */, + 0x261d87ff /* 0004c3b0 - 0004cbaf [ 2048] */, + 0x265d87ff /* 0004cbb0 - 0004d3af [ 2048] */, + 0x269d87ff /* 0004d3b0 - 0004dbaf [ 2048] */, + 0x26dd87ff /* 0004dbb0 - 0004e3af [ 2048] */, + 0x271d87ff /* 0004e3b0 - 0004ebaf [ 2048] */, + 0x275d87ff /* 0004ebb0 - 0004f3af [ 2048] */, + 0x279d87ff /* 0004f3b0 - 0004fbaf [ 2048] */, + 0x27dd87ff /* 0004fbb0 - 000503af [ 2048] */, + 0x281d87ff /* 000503b0 - 00050baf [ 2048] */, + 0x285d87ff /* 00050bb0 - 000513af [ 2048] */, + 0x289d87ff /* 000513b0 - 00051baf [ 2048] */, + 0x28dd87ff /* 00051bb0 - 000523af [ 2048] */, + 0x291d87ff /* 000523b0 - 00052baf [ 2048] */, + 0x295d87ff /* 00052bb0 - 000533af [ 2048] */, + 0x299d87ff /* 000533b0 - 00053baf [ 2048] */, + 0x29dd87ff /* 00053bb0 - 000543af [ 2048] */, + 0x2a1d87ff /* 000543b0 - 00054baf [ 2048] */, + 0x2a5d87ff /* 00054bb0 - 000553af [ 2048] */, + 0x2a9d87ff /* 000553b0 - 00055baf [ 2048] */, + 0x2add87ff /* 00055bb0 - 000563af [ 2048] */, + 0x2b1d87ff /* 000563b0 - 00056baf [ 2048] */, + 0x2b5d87ff /* 00056bb0 - 000573af [ 2048] */, + 0x2b9d87ff /* 000573b0 - 00057baf [ 2048] */, + 0x2bdd87ff /* 00057bb0 - 000583af [ 2048] */, + 0x2c1d87ff /* 000583b0 - 00058baf [ 2048] */, + 0x2c5d87ff /* 00058bb0 - 000593af [ 2048] */, + 0x2c9d87ff /* 000593b0 - 00059baf [ 2048] */, + 0x2cdd87ff /* 00059bb0 - 0005a3af [ 2048] */, + 0x2d1d87ff /* 0005a3b0 - 0005abaf [ 2048] */, + 0x2d5d87ff /* 0005abb0 - 0005b3af [ 2048] */, + 0x2d9d87ff /* 0005b3b0 - 0005bbaf [ 2048] */, + 0x2ddd87ff /* 0005bbb0 - 0005c3af [ 2048] */, + 0x2e1d87ff /* 0005c3b0 - 0005cbaf [ 2048] */, + 0x2e5d87ff /* 0005cbb0 - 0005d3af [ 2048] */, + 0x2e9d87ff /* 0005d3b0 - 0005dbaf [ 2048] */, + 0x2edd87ff /* 0005dbb0 - 0005e3af [ 2048] */, + 0x2f1d87ff /* 0005e3b0 - 0005ebaf [ 2048] */, + 0x2f5d87ff /* 0005ebb0 - 0005f3af [ 2048] */, + 0x2f9d87ff /* 0005f3b0 - 0005fbaf [ 2048] */, + 0x2fdd87ff /* 0005fbb0 - 000603af [ 2048] */, + 0x301d87ff /* 000603b0 - 00060baf [ 2048] */, + 0x305d87ff /* 00060bb0 - 000613af [ 2048] */, + 0x309d87ff /* 000613b0 - 00061baf [ 2048] */, + 0x30dd87ff /* 00061bb0 - 000623af [ 2048] */, + 0x311d87ff /* 000623b0 - 00062baf [ 2048] */, + 0x315d87ff /* 00062bb0 - 000633af [ 2048] */, + 0x319d87ff /* 000633b0 - 00063baf [ 2048] */, + 0x31dd87ff /* 00063bb0 - 000643af [ 2048] */, + 0x321d87ff /* 000643b0 - 00064baf [ 2048] */, + 0x325d87ff /* 00064bb0 - 000653af [ 2048] */, + 0x329d87ff /* 000653b0 - 00065baf [ 2048] */, + 0x32dd87ff /* 00065bb0 - 000663af [ 2048] */, + 0x331d87ff /* 000663b0 - 00066baf [ 2048] */, + 0x335d87ff /* 00066bb0 - 000673af [ 2048] */, + 0x339d87ff /* 000673b0 - 00067baf [ 2048] */, + 0x33dd87ff /* 00067bb0 - 000683af [ 2048] */, + 0x341d87ff /* 000683b0 - 00068baf [ 2048] */, + 0x345d87ff /* 00068bb0 - 000693af [ 2048] */, + 0x349d87ff /* 000693b0 - 00069baf [ 2048] */, + 0x34dd87ff /* 00069bb0 - 0006a3af [ 2048] */, + 0x351d87ff /* 0006a3b0 - 0006abaf [ 2048] */, + 0x355d87ff /* 0006abb0 - 0006b3af [ 2048] */, + 0x359d87ff /* 0006b3b0 - 0006bbaf [ 2048] */, + 0x35dd87ff /* 0006bbb0 - 0006c3af [ 2048] */, + 0x361d87ff /* 0006c3b0 - 0006cbaf [ 2048] */, + 0x365d87ff /* 0006cbb0 - 0006d3af [ 2048] */, + 0x369d87ff /* 0006d3b0 - 0006dbaf [ 2048] */, + 0x36dd87ff /* 0006dbb0 - 0006e3af [ 2048] */, + 0x371d87ff /* 0006e3b0 - 0006ebaf [ 2048] */, + 0x375d87ff /* 0006ebb0 - 0006f3af [ 2048] */, + 0x379d87ff /* 0006f3b0 - 0006fbaf [ 2048] */, + 0x37dd87ff /* 0006fbb0 - 000703af [ 2048] */, + 0x381d87ff /* 000703b0 - 00070baf [ 2048] */, + 0x385d87ff /* 00070bb0 - 000713af [ 2048] */, + 0x389d87ff /* 000713b0 - 00071baf [ 2048] */, + 0x38dd87ff /* 00071bb0 - 000723af [ 2048] */, + 0x391d87ff /* 000723b0 - 00072baf [ 2048] */, + 0x395d87ff /* 00072bb0 - 000733af [ 2048] */, + 0x399d87ff /* 000733b0 - 00073baf [ 2048] */, + 0x39dd87ff /* 00073bb0 - 000743af [ 2048] */, + 0x3a1d87ff /* 000743b0 - 00074baf [ 2048] */, + 0x3a5d87ff /* 00074bb0 - 000753af [ 2048] */, + 0x3a9d87ff /* 000753b0 - 00075baf [ 2048] */, + 0x3add87ff /* 00075bb0 - 000763af [ 2048] */, + 0x3b1d87ff /* 000763b0 - 00076baf [ 2048] */, + 0x3b5d87ff /* 00076bb0 - 000773af [ 2048] */, + 0x3b9d87ff /* 000773b0 - 00077baf [ 2048] */, + 0x3bdd87ff /* 00077bb0 - 000783af [ 2048] */, + 0x3c1d87ff /* 000783b0 - 00078baf [ 2048] */, + 0x3c5d87ff /* 00078bb0 - 000793af [ 2048] */, + 0x3c9d87ff /* 000793b0 - 00079baf [ 2048] */, + 0x3cdd87ff /* 00079bb0 - 0007a3af [ 2048] */, + 0x3d1d87ff /* 0007a3b0 - 0007abaf [ 2048] */, + 0x3d5d87ff /* 0007abb0 - 0007b3af [ 2048] */, + 0x3d9d87ff /* 0007b3b0 - 0007bbaf [ 2048] */, + 0x3ddd87ff /* 0007bbb0 - 0007c3af [ 2048] */, + 0x3e1d87ff /* 0007c3b0 - 0007cbaf [ 2048] */, + 0x3e5d87ff /* 0007cbb0 - 0007d3af [ 2048] */, + 0x3e9d87ff /* 0007d3b0 - 0007dbaf [ 2048] */, + 0x3edd87ff /* 0007dbb0 - 0007e3af [ 2048] */, + 0x3f1d87ff /* 0007e3b0 - 0007ebaf [ 2048] */, + 0x3f5d87ff /* 0007ebb0 - 0007f3af [ 2048] */, + 0x3f9d87ff /* 0007f3b0 - 0007fbaf [ 2048] */, + 0x3fdd87ff /* 0007fbb0 - 000803af [ 2048] */, + 0x401d87ff /* 000803b0 - 00080baf [ 2048] */, + 0x405d87ff /* 00080bb0 - 000813af [ 2048] */, + 0x409d87ff /* 000813b0 - 00081baf [ 2048] */, + 0x40dd87ff /* 00081bb0 - 000823af [ 2048] */, + 0x411d87ff /* 000823b0 - 00082baf [ 2048] */, + 0x415d87ff /* 00082bb0 - 000833af [ 2048] */, + 0x419d87ff /* 000833b0 - 00083baf [ 2048] */, + 0x41dd87ff /* 00083bb0 - 000843af [ 2048] */, + 0x421d87ff /* 000843b0 - 00084baf [ 2048] */, + 0x425d87ff /* 00084bb0 - 000853af [ 2048] */, + 0x429d87ff /* 000853b0 - 00085baf [ 2048] */, + 0x42dd87ff /* 00085bb0 - 000863af [ 2048] */, + 0x431d87ff /* 000863b0 - 00086baf [ 2048] */, + 0x435d87ff /* 00086bb0 - 000873af [ 2048] */, + 0x439d87ff /* 000873b0 - 00087baf [ 2048] */, + 0x43dd87ff /* 00087bb0 - 000883af [ 2048] */, + 0x441d87ff /* 000883b0 - 00088baf [ 2048] */, + 0x445d87ff /* 00088bb0 - 000893af [ 2048] */, + 0x449d87ff /* 000893b0 - 00089baf [ 2048] */, + 0x44dd87ff /* 00089bb0 - 0008a3af [ 2048] */, + 0x451d87ff /* 0008a3b0 - 0008abaf [ 2048] */, + 0x455d87ff /* 0008abb0 - 0008b3af [ 2048] */, + 0x459d87ff /* 0008b3b0 - 0008bbaf [ 2048] */, + 0x45dd87ff /* 0008bbb0 - 0008c3af [ 2048] */, + 0x461d87ff /* 0008c3b0 - 0008cbaf [ 2048] */, + 0x465d87ff /* 0008cbb0 - 0008d3af [ 2048] */, + 0x469d87ff /* 0008d3b0 - 0008dbaf [ 2048] */, + 0x46dd87ff /* 0008dbb0 - 0008e3af [ 2048] */, + 0x471d87ff /* 0008e3b0 - 0008ebaf [ 2048] */, + 0x475d87ff /* 0008ebb0 - 0008f3af [ 2048] */, + 0x479d87ff /* 0008f3b0 - 0008fbaf [ 2048] */, + 0x47dd87ff /* 0008fbb0 - 000903af [ 2048] */, + 0x481d87ff /* 000903b0 - 00090baf [ 2048] */, + 0x485d87ff /* 00090bb0 - 000913af [ 2048] */, + 0x489d87ff /* 000913b0 - 00091baf [ 2048] */, + 0x48dd87ff /* 00091bb0 - 000923af [ 2048] */, + 0x491d87ff /* 000923b0 - 00092baf [ 2048] */, + 0x495d87ff /* 00092bb0 - 000933af [ 2048] */, + 0x499d87ff /* 000933b0 - 00093baf [ 2048] */, + 0x49dd87ff /* 00093bb0 - 000943af [ 2048] */, + 0x4a1d87ff /* 000943b0 - 00094baf [ 2048] */, + 0x4a5d87ff /* 00094bb0 - 000953af [ 2048] */, + 0x4a9d87ff /* 000953b0 - 00095baf [ 2048] */, + 0x4add87ff /* 00095bb0 - 000963af [ 2048] */, + 0x4b1d87ff /* 000963b0 - 00096baf [ 2048] */, + 0x4b5d87ff /* 00096bb0 - 000973af [ 2048] */, + 0x4b9d87ff /* 000973b0 - 00097baf [ 2048] */, + 0x4bdd87ff /* 00097bb0 - 000983af [ 2048] */, + 0x4c1d87ff /* 000983b0 - 00098baf [ 2048] */, + 0x4c5d87ff /* 00098bb0 - 000993af [ 2048] */, + 0x4c9d87ff /* 000993b0 - 00099baf [ 2048] */, + 0x4cdd87ff /* 00099bb0 - 0009a3af [ 2048] */, + 0x4d1d87ff /* 0009a3b0 - 0009abaf [ 2048] */, + 0x4d5d87ff /* 0009abb0 - 0009b3af [ 2048] */, + 0x4d9d87ff /* 0009b3b0 - 0009bbaf [ 2048] */, + 0x4ddd87ff /* 0009bbb0 - 0009c3af [ 2048] */, + 0x4e1d87ff /* 0009c3b0 - 0009cbaf [ 2048] */, + 0x4e5d87ff /* 0009cbb0 - 0009d3af [ 2048] */, + 0x4e9d87ff /* 0009d3b0 - 0009dbaf [ 2048] */, + 0x4edd87ff /* 0009dbb0 - 0009e3af [ 2048] */, + 0x4f1d87ff /* 0009e3b0 - 0009ebaf [ 2048] */, + 0x4f5d87ff /* 0009ebb0 - 0009f3af [ 2048] */, + 0x4f9d87ff /* 0009f3b0 - 0009fbaf [ 2048] */, + 0x4fdd87ff /* 0009fbb0 - 000a03af [ 2048] */, + 0x501d87ff /* 000a03b0 - 000a0baf [ 2048] */, + 0x505d87ff /* 000a0bb0 - 000a13af [ 2048] */, + 0x509d87ff /* 000a13b0 - 000a1baf [ 2048] */, + 0x50dd87ff /* 000a1bb0 - 000a23af [ 2048] */, + 0x511d87ff /* 000a23b0 - 000a2baf [ 2048] */, + 0x515d87ff /* 000a2bb0 - 000a33af [ 2048] */, + 0x519d87ff /* 000a33b0 - 000a3baf [ 2048] */, + 0x51dd87ff /* 000a3bb0 - 000a43af [ 2048] */, + 0x521d87ff /* 000a43b0 - 000a4baf [ 2048] */, + 0x525d87ff /* 000a4bb0 - 000a53af [ 2048] */, + 0x529d87ff /* 000a53b0 - 000a5baf [ 2048] */, + 0x52dd87ff /* 000a5bb0 - 000a63af [ 2048] */, + 0x531d87ff /* 000a63b0 - 000a6baf [ 2048] */, + 0x535d87ff /* 000a6bb0 - 000a73af [ 2048] */, + 0x539d87ff /* 000a73b0 - 000a7baf [ 2048] */, + 0x53dd87ff /* 000a7bb0 - 000a83af [ 2048] */, + 0x541d87ff /* 000a83b0 - 000a8baf [ 2048] */, + 0x545d87ff /* 000a8bb0 - 000a93af [ 2048] */, + 0x549d87ff /* 000a93b0 - 000a9baf [ 2048] */, + 0x54dd87ff /* 000a9bb0 - 000aa3af [ 2048] */, + 0x551d87ff /* 000aa3b0 - 000aabaf [ 2048] */, + 0x555d87ff /* 000aabb0 - 000ab3af [ 2048] */, + 0x559d87ff /* 000ab3b0 - 000abbaf [ 2048] */, + 0x55dd87ff /* 000abbb0 - 000ac3af [ 2048] */, + 0x561d87ff /* 000ac3b0 - 000acbaf [ 2048] */, + 0x565d87ff /* 000acbb0 - 000ad3af [ 2048] */, + 0x569d87ff /* 000ad3b0 - 000adbaf [ 2048] */, + 0x56dd87ff /* 000adbb0 - 000ae3af [ 2048] */, + 0x571d87ff /* 000ae3b0 - 000aebaf [ 2048] */, + 0x575d87ff /* 000aebb0 - 000af3af [ 2048] */, + 0x579d87ff /* 000af3b0 - 000afbaf [ 2048] */, + 0x57dd87ff /* 000afbb0 - 000b03af [ 2048] */, + 0x581d87ff /* 000b03b0 - 000b0baf [ 2048] */, + 0x585d87ff /* 000b0bb0 - 000b13af [ 2048] */, + 0x589d87ff /* 000b13b0 - 000b1baf [ 2048] */, + 0x58dd87ff /* 000b1bb0 - 000b23af [ 2048] */, + 0x591d87ff /* 000b23b0 - 000b2baf [ 2048] */, + 0x595d87ff /* 000b2bb0 - 000b33af [ 2048] */, + 0x599d87ff /* 000b33b0 - 000b3baf [ 2048] */, + 0x59dd87ff /* 000b3bb0 - 000b43af [ 2048] */, + 0x5a1d87ff /* 000b43b0 - 000b4baf [ 2048] */, + 0x5a5d87ff /* 000b4bb0 - 000b53af [ 2048] */, + 0x5a9d87ff /* 000b53b0 - 000b5baf [ 2048] */, + 0x5add87ff /* 000b5bb0 - 000b63af [ 2048] */, + 0x5b1d87ff /* 000b63b0 - 000b6baf [ 2048] */, + 0x5b5d87ff /* 000b6bb0 - 000b73af [ 2048] */, + 0x5b9d87ff /* 000b73b0 - 000b7baf [ 2048] */, + 0x5bdd87ff /* 000b7bb0 - 000b83af [ 2048] */, + 0x5c1d87ff /* 000b83b0 - 000b8baf [ 2048] */, + 0x5c5d87ff /* 000b8bb0 - 000b93af [ 2048] */, + 0x5c9d87ff /* 000b93b0 - 000b9baf [ 2048] */, + 0x5cdd87ff /* 000b9bb0 - 000ba3af [ 2048] */, + 0x5d1d87ff /* 000ba3b0 - 000babaf [ 2048] */, + 0x5d5d87ff /* 000babb0 - 000bb3af [ 2048] */, + 0x5d9d87ff /* 000bb3b0 - 000bbbaf [ 2048] */, + 0x5ddd87ff /* 000bbbb0 - 000bc3af [ 2048] */, + 0x5e1d87ff /* 000bc3b0 - 000bcbaf [ 2048] */, + 0x5e5d87ff /* 000bcbb0 - 000bd3af [ 2048] */, + 0x5e9d87ff /* 000bd3b0 - 000bdbaf [ 2048] */, + 0x5edd87ff /* 000bdbb0 - 000be3af [ 2048] */, + 0x5f1d87ff /* 000be3b0 - 000bebaf [ 2048] */, + 0x5f5d87ff /* 000bebb0 - 000bf3af [ 2048] */, + 0x5f9d87ff /* 000bf3b0 - 000bfbaf [ 2048] */, + 0x5fdd87ff /* 000bfbb0 - 000c03af [ 2048] */, + 0x601d87ff /* 000c03b0 - 000c0baf [ 2048] */, + 0x605d87ff /* 000c0bb0 - 000c13af [ 2048] */, + 0x609d87ff /* 000c13b0 - 000c1baf [ 2048] */, + 0x60dd87ff /* 000c1bb0 - 000c23af [ 2048] */, + 0x611d87ff /* 000c23b0 - 000c2baf [ 2048] */, + 0x615d87ff /* 000c2bb0 - 000c33af [ 2048] */, + 0x619d87ff /* 000c33b0 - 000c3baf [ 2048] */, + 0x61dd87ff /* 000c3bb0 - 000c43af [ 2048] */, + 0x621d87ff /* 000c43b0 - 000c4baf [ 2048] */, + 0x625d87ff /* 000c4bb0 - 000c53af [ 2048] */, + 0x629d87ff /* 000c53b0 - 000c5baf [ 2048] */, + 0x62dd87ff /* 000c5bb0 - 000c63af [ 2048] */, + 0x631d87ff /* 000c63b0 - 000c6baf [ 2048] */, + 0x635d87ff /* 000c6bb0 - 000c73af [ 2048] */, + 0x639d87ff /* 000c73b0 - 000c7baf [ 2048] */, + 0x63dd87ff /* 000c7bb0 - 000c83af [ 2048] */, + 0x641d87ff /* 000c83b0 - 000c8baf [ 2048] */, + 0x645d87ff /* 000c8bb0 - 000c93af [ 2048] */, + 0x649d87ff /* 000c93b0 - 000c9baf [ 2048] */, + 0x64dd87ff /* 000c9bb0 - 000ca3af [ 2048] */, + 0x651d87ff /* 000ca3b0 - 000cabaf [ 2048] */, + 0x655d87ff /* 000cabb0 - 000cb3af [ 2048] */, + 0x659d87ff /* 000cb3b0 - 000cbbaf [ 2048] */, + 0x65dd87ff /* 000cbbb0 - 000cc3af [ 2048] */, + 0x661d87ff /* 000cc3b0 - 000ccbaf [ 2048] */, + 0x665d87ff /* 000ccbb0 - 000cd3af [ 2048] */, + 0x669d87ff /* 000cd3b0 - 000cdbaf [ 2048] */, + 0x66dd87ff /* 000cdbb0 - 000ce3af [ 2048] */, + 0x671d87ff /* 000ce3b0 - 000cebaf [ 2048] */, + 0x675d87ff /* 000cebb0 - 000cf3af [ 2048] */, + 0x679d87ff /* 000cf3b0 - 000cfbaf [ 2048] */, + 0x67dd87ff /* 000cfbb0 - 000d03af [ 2048] */, + 0x681d87ff /* 000d03b0 - 000d0baf [ 2048] */, + 0x685d87ff /* 000d0bb0 - 000d13af [ 2048] */, + 0x689d87ff /* 000d13b0 - 000d1baf [ 2048] */, + 0x68dd87ff /* 000d1bb0 - 000d23af [ 2048] */, + 0x691d87ff /* 000d23b0 - 000d2baf [ 2048] */, + 0x695d87ff /* 000d2bb0 - 000d33af [ 2048] */, + 0x699d87ff /* 000d33b0 - 000d3baf [ 2048] */, + 0x69dd87ff /* 000d3bb0 - 000d43af [ 2048] */, + 0x6a1d87ff /* 000d43b0 - 000d4baf [ 2048] */, + 0x6a5d87ff /* 000d4bb0 - 000d53af [ 2048] */, + 0x6a9d87ff /* 000d53b0 - 000d5baf [ 2048] */, + 0x6add87ff /* 000d5bb0 - 000d63af [ 2048] */, + 0x6b1d87ff /* 000d63b0 - 000d6baf [ 2048] */, + 0x6b5d87ff /* 000d6bb0 - 000d73af [ 2048] */, + 0x6b9d87ff /* 000d73b0 - 000d7baf [ 2048] */, + 0x6bdd87ff /* 000d7bb0 - 000d83af [ 2048] */, + 0x6c1d87ff /* 000d83b0 - 000d8baf [ 2048] */, + 0x6c5d87ff /* 000d8bb0 - 000d93af [ 2048] */, + 0x6c9d87ff /* 000d93b0 - 000d9baf [ 2048] */, + 0x6cdd87ff /* 000d9bb0 - 000da3af [ 2048] */, + 0x6d1d87ff /* 000da3b0 - 000dabaf [ 2048] */, + 0x6d5d87ff /* 000dabb0 - 000db3af [ 2048] */, + 0x6d9d87ff /* 000db3b0 - 000dbbaf [ 2048] */, + 0x6ddd87ff /* 000dbbb0 - 000dc3af [ 2048] */, + 0x6e1d87ff /* 000dc3b0 - 000dcbaf [ 2048] */, + 0x6e5d87ff /* 000dcbb0 - 000dd3af [ 2048] */, + 0x6e9d87ff /* 000dd3b0 - 000ddbaf [ 2048] */, + 0x6edd87ff /* 000ddbb0 - 000de3af [ 2048] */, + 0x6f1d87ff /* 000de3b0 - 000debaf [ 2048] */, + 0x6f5d87ff /* 000debb0 - 000df3af [ 2048] */, + 0x6f9d87ff /* 000df3b0 - 000dfbaf [ 2048] */, + 0x6fdd854f /* 000dfbb0 - 000e00ff [ 1360] */}; /// At the end of the valid Unicode code points space a lot of code points are /// either reserved or a noncharacter. Adding all these entries to the /// lookup table would add 446 entries to the table (in Unicode 14). /// Instead the only the start of the region is stored, every code point in /// this region needs to be escaped. -inline constexpr uint32_t __unallocated_region_lower_bound = 0x000323b0; +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __unallocated_region_lower_bound = 0x000e01f0; /// Returns whether the code unit needs to be escaped. /// diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h index 43a074dd8d7002..052ea98c3c3b8c 100644 --- a/libcxx/include/__format/write_escaped.h +++ b/libcxx/include/__format/write_escaped.h @@ -101,15 +101,27 @@ _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_Cha } template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool +__is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) { # ifdef _LIBCPP_HAS_NO_UNICODE // For ASCII assume everything above 127 is printable. if (__value > 127) return false; # endif + // [format.string.escaped]/2.2.1.2.1 + // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar + // value whose Unicode property General_Category has a value in the groups + // Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard, if (!__escaped_output_table::__needs_escape(__value)) - return false; + // [format.string.escaped]/2.2.1.2.2 + // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar + // value with the Unicode property Grapheme_Extend=Yes as described by UAX + // #44 of the Unicode Standard and C is not immediately preceded in S by a + // character P appended to E without translation to an escape sequence, + if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) != + __extended_grapheme_custer_property_boundary::__property::__Extend) + return false; __formatter::__write_well_formed_escaped_code_unit(__str, __value); return true; @@ -124,8 +136,8 @@ enum class __escape_quotation_mark { __apostrophe, __double_quote }; // [format.string.escaped]/2 template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool -__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written( + basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) { // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] switch (__value) { case _CharT('\t'): @@ -167,7 +179,7 @@ __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __e // TODO FMT determine what to do with shift sequences. // 2.2.1.2.1 and 2.2.1.2.2 - Escape - return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value)); + return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value)); } template @@ -175,11 +187,15 @@ _LIBCPP_HIDE_FROM_ABI void __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; + // When the first code unit has the property Grapheme_Extend=Yes it needs to + // be escaped. This happens when the previous code unit was also escaped. + bool __escape = true; while (!__view.__at_end()) { auto __first = __view.__position(); typename __unicode::__consume_result __result = __view.__consume(); if (__result.__status == __unicode::__consume_result::__ok) { - if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark)) + __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark); + if (!__escape) // 2.2.1.3 - Add the character ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); } else { diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf index 7964758c908f4c..a5b4ab9520aedb 100644 --- a/libcxx/include/streambuf +++ b/libcxx/include/streambuf @@ -107,10 +107,12 @@ protected: */ +#include <__assert> #include <__config> #include <__fwd/streambuf.h> #include <__locale> #include <__type_traits/is_same.h> +#include <__utility/is_valid_range.h> #include #include #include @@ -234,6 +236,9 @@ protected: inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; } inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range"); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range"); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gnext, __gend), "[gnext, gend) must be a valid range"); __binp_ = __gbeg; __ninp_ = __gnext; __einp_ = __gend; @@ -249,6 +254,7 @@ protected: _LIBCPP_HIDE_FROM_ABI void __pbump(streamsize __n) { __nout_ += __n; } inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setp(char_type* __pbeg, char_type* __pend) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__pbeg, __pend), "[pbeg, pend) must be a valid range"); __bout_ = __nout_ = __pbeg; __eout_ = __pend; } diff --git a/libcxx/include/variant b/libcxx/include/variant index 858a49b980bd9a..34150bd452842e 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -657,6 +657,10 @@ private: } // namespace __visitation +// Adding semi-colons in macro expansions helps clang-format to do a better job. +// This macro is used to avoid compilation errors due to "stray" semi-colons. +# define _LIBCPP_EAT_SEMICOLON static_assert(true, "") + template struct _LIBCPP_TEMPLATE_VIS __alt { using __value_type = _Tp; @@ -691,11 +695,10 @@ union _LIBCPP_TEMPLATE_VIS __union<_DestructibleTrait, _Index> {}; __union(const __union&) = default; \ __union(__union&&) = default; \ \ - destructor \ + destructor; \ \ - __union& \ - operator=(const __union&) = default; \ - __union& operator=(__union&&) = default; \ + __union& operator=(const __union&) = default; \ + __union& operator=(__union&&) = default; \ \ private: \ char __dummy; \ @@ -705,9 +708,10 @@ union _LIBCPP_TEMPLATE_VIS __union<_DestructibleTrait, _Index> {}; friend struct __access::__union; \ } -_LIBCPP_VARIANT_UNION(_Trait::_TriviallyAvailable, ~__union() = default;); -_LIBCPP_VARIANT_UNION(_Trait::_Available, ~__union(){}); -_LIBCPP_VARIANT_UNION(_Trait::_Unavailable, ~__union() = delete;); +_LIBCPP_VARIANT_UNION(_Trait::_TriviallyAvailable, ~__union() = default); +_LIBCPP_VARIANT_UNION( + _Trait::_Available, _LIBCPP_HIDE_FROM_ABI ~__union() {} _LIBCPP_EAT_SEMICOLON); +_LIBCPP_VARIANT_UNION(_Trait::_Unavailable, ~__union() = delete); # undef _LIBCPP_VARIANT_UNION @@ -761,23 +765,27 @@ class _LIBCPP_TEMPLATE_VIS __dtor; using __base_type::__base_type; \ using __base_type::operator=; \ \ - __dtor(const __dtor&) = default; \ - __dtor(__dtor&&) = default; \ - destructor __dtor& operator=(const __dtor&) = default; \ - __dtor& operator=(__dtor&&) = default; \ + __dtor(const __dtor&) = default; \ + __dtor(__dtor&&) = default; \ + __dtor& operator=(const __dtor&) = default; \ + __dtor& operator=(__dtor&&) = default; \ + destructor; \ \ protected: \ - inline _LIBCPP_HIDE_FROM_ABI destroy \ + inline _LIBCPP_HIDE_FROM_ABI destroy; \ } _LIBCPP_VARIANT_DESTRUCTOR( - _Trait::_TriviallyAvailable, ~__dtor() = default; - , void __destroy() noexcept { this->__index = __variant_npos<__index_t>; }); + _Trait::_TriviallyAvailable, + ~__dtor() = default, // + _LIBCPP_HIDE_FROM_ABI void __destroy() noexcept { + this->__index = __variant_npos<__index_t>; + } _LIBCPP_EAT_SEMICOLON); _LIBCPP_VARIANT_DESTRUCTOR( _Trait::_Available, - ~__dtor() { __destroy(); }, - void __destroy() noexcept { + _LIBCPP_HIDE_FROM_ABI ~__dtor() { __destroy(); } _LIBCPP_EAT_SEMICOLON, + _LIBCPP_HIDE_FROM_ABI void __destroy() noexcept { if (!this->valueless_by_exception()) { __visitation::__base::__visit_alt( [](auto& __alt) noexcept { @@ -787,9 +795,9 @@ _LIBCPP_VARIANT_DESTRUCTOR( *this); } this->__index = __variant_npos<__index_t>; - }); + } _LIBCPP_EAT_SEMICOLON); -_LIBCPP_VARIANT_DESTRUCTOR(_Trait::_Unavailable, ~__dtor() = delete;, void __destroy() noexcept = delete;); +_LIBCPP_VARIANT_DESTRUCTOR(_Trait::_Unavailable, ~__dtor() = delete, void __destroy() noexcept = delete); # undef _LIBCPP_VARIANT_DESTRUCTOR @@ -839,20 +847,24 @@ class _LIBCPP_TEMPLATE_VIS __move_constructor; using __base_type::operator=; \ \ __move_constructor(const __move_constructor&) = default; \ - move_constructor ~__move_constructor() = default; \ + ~__move_constructor() = default; \ __move_constructor& operator=(const __move_constructor&) = default; \ __move_constructor& operator=(__move_constructor&&) = default; \ + move_constructor; \ } _LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_TriviallyAvailable, - __move_constructor(__move_constructor&& __that) = default;); + __move_constructor(__move_constructor&& __that) = default); _LIBCPP_VARIANT_MOVE_CONSTRUCTOR( _Trait::_Available, - __move_constructor(__move_constructor&& __that) noexcept(__all...>::value) - : __move_constructor(__valueless_t{}) { this->__generic_construct(*this, std::move(__that)); }); + _LIBCPP_HIDE_FROM_ABI __move_constructor(__move_constructor&& __that) noexcept( + __all...>::value) + : __move_constructor(__valueless_t{}) { + this->__generic_construct(*this, std::move(__that)); + } _LIBCPP_EAT_SEMICOLON); -_LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete;); +_LIBCPP_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete); # undef _LIBCPP_VARIANT_MOVE_CONSTRUCTOR @@ -869,20 +881,21 @@ class _LIBCPP_TEMPLATE_VIS __copy_constructor; using __base_type::__base_type; \ using __base_type::operator=; \ \ - copy_constructor __copy_constructor(__copy_constructor&&) = default; \ - ~__copy_constructor() = default; \ - __copy_constructor& operator=(const __copy_constructor&) = default; \ - __copy_constructor& operator=(__copy_constructor&&) = default; \ - } + __copy_constructor(__copy_constructor&&) = default; \ + ~__copy_constructor() = default; \ + __copy_constructor& operator=(const __copy_constructor&) = default; \ + __copy_constructor& operator=(__copy_constructor&&) = default; \ + copy_constructor; \ + } // namespace __variant_detail _LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_TriviallyAvailable, - __copy_constructor(const __copy_constructor& __that) = default;); + __copy_constructor(const __copy_constructor& __that) = default); _LIBCPP_VARIANT_COPY_CONSTRUCTOR( - _Trait::_Available, __copy_constructor(const __copy_constructor& __that) - : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); }); + _Trait::_Available, _LIBCPP_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that) + : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); } _LIBCPP_EAT_SEMICOLON); -_LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete;); +_LIBCPP_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete); # undef _LIBCPP_VARIANT_COPY_CONSTRUCTOR @@ -955,22 +968,22 @@ class _LIBCPP_TEMPLATE_VIS __move_assignment; __move_assignment(__move_assignment&&) = default; \ ~__move_assignment() = default; \ __move_assignment& operator=(const __move_assignment&) = default; \ - move_assignment \ + move_assignment; \ } _LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_TriviallyAvailable, - __move_assignment& operator=(__move_assignment&& __that) = default;); + __move_assignment& operator=(__move_assignment&& __that) = default); _LIBCPP_VARIANT_MOVE_ASSIGNMENT( _Trait::_Available, - __move_assignment& + _LIBCPP_HIDE_FROM_ABI __move_assignment& operator=(__move_assignment&& __that) noexcept( __all<(is_nothrow_move_constructible_v<_Types> && is_nothrow_move_assignable_v<_Types>)...>::value) { this->__generic_assign(std::move(__that)); return *this; - }); + } _LIBCPP_EAT_SEMICOLON); -_LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_Unavailable, __move_assignment& operator=(__move_assignment&&) = delete;); +_LIBCPP_VARIANT_MOVE_ASSIGNMENT(_Trait::_Unavailable, __move_assignment& operator=(__move_assignment&&) = delete); # undef _LIBCPP_VARIANT_MOVE_ASSIGNMENT @@ -987,22 +1000,23 @@ class _LIBCPP_TEMPLATE_VIS __copy_assignment; using __base_type::__base_type; \ using __base_type::operator=; \ \ - __copy_assignment(const __copy_assignment&) = default; \ - __copy_assignment(__copy_assignment&&) = default; \ - ~__copy_assignment() = default; \ - copy_assignment __copy_assignment& operator=(__copy_assignment&&) = default; \ + __copy_assignment(const __copy_assignment&) = default; \ + __copy_assignment(__copy_assignment&&) = default; \ + ~__copy_assignment() = default; \ + __copy_assignment& operator=(__copy_assignment&&) = default; \ + copy_assignment; \ } _LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_TriviallyAvailable, - __copy_assignment& operator=(const __copy_assignment& __that) = default;); + __copy_assignment& operator=(const __copy_assignment& __that) = default); _LIBCPP_VARIANT_COPY_ASSIGNMENT( - _Trait::_Available, __copy_assignment& operator=(const __copy_assignment& __that) { + _Trait::_Available, _LIBCPP_HIDE_FROM_ABI __copy_assignment& operator=(const __copy_assignment& __that) { this->__generic_assign(__that); return *this; - }); + } _LIBCPP_EAT_SEMICOLON); -_LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_Unavailable, __copy_assignment& operator=(const __copy_assignment&) = delete;); +_LIBCPP_VARIANT_COPY_ASSIGNMENT(_Trait::_Unavailable, __copy_assignment& operator=(const __copy_assignment&) = delete); # undef _LIBCPP_VARIANT_COPY_ASSIGNMENT diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp index 7350c1ddf0e901..4119c39772e564 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_add.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: LIBCXX-AIX-FIXME // XFAIL: !has-64-bit-atomics // https://github.com/llvm/llvm-project/issues/72893 diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp index 84dcde5f2784f2..2460765a3c860c 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/fetch_sub.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: LIBCXX-AIX-FIXME // XFAIL: !has-64-bit-atomics // https://github.com/llvm/llvm-project/issues/72893 diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp index 386a393e355039..4bd303022c0dad 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.minus_equals.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: LIBCXX-AIX-FIXME // XFAIL: !has-64-bit-atomics // floating-point-type operator-=(floating-point-type) volatile noexcept; diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp index afd06d537c7adb..69abb9ae63c38b 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/operator.plus_equals.pass.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: LIBCXX-AIX-FIXME // XFAIL: !has-64-bit-atomics // floating-point-type operator+=(floating-point-type) volatile noexcept; diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp index 58067511950703..b458f93601a1ba 100644 --- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp @@ -57,18 +57,20 @@ int main(int, char**) test t2 = t; } { - char g1, g2, g3, p1, p3; + char g[3]; + char p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2 = t; } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { - wchar_t g1, g2, g3, p1, p3; + wchar_t g[3]; + wchar_t p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2 = t; } { diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp index 8a976e77f0f13f..45a8cdf3a23fea 100644 --- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp @@ -59,10 +59,11 @@ int main(int, char**) t2 = t; } { - char g1, g2, g3, p1, p3; + char g[3]; + char p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2; t2 = t; } @@ -73,10 +74,11 @@ int main(int, char**) t2 = t; } { - wchar_t g1, g2, g3, p1, p3; + wchar_t g[3]; + wchar_t p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2; t2 = t; } diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp index c575c2cb12711a..b90c4c053c9155 100644 --- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp @@ -68,10 +68,11 @@ int main(int, char**) t2.swap(t); } { - char g1, g2, g3, p1, p3; + char g[3]; + char p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2; t2.swap(t); } @@ -82,10 +83,11 @@ int main(int, char**) t2.swap(t); } { - wchar_t g1, g2, g3, p1, p3; + wchar_t g[3]; + wchar_t p[3]; test t; - t.setg(&g1, &g2, &g3); - t.setp(&p1, &p3); + t.setg(&g[0], &g[1], &g[2]); + t.setp(&p[0], &p[2]); test t2; t2.swap(t); } diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp new file mode 100644 index 00000000000000..becf89b12fdd18 --- /dev/null +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.assert.pass.cpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// UNSUPPORTED: libcpp-hardening-mode=none +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +// + +// template > +// class basic_streambuf; + +// void setg(char_type* gbeg, char_type* gnext, char_type* gend); + +#include +#include +#include +#include + +#include "check_assertion.h" +#include "make_string.h" +#include "test_macros.h" + +template +struct streambuf : public std::basic_streambuf { + typedef std::basic_streambuf base; + + streambuf() {} + + void setg(CharT* gbeg, CharT* gnext, CharT* gend) { base::setg(gbeg, gnext, gend); } +}; + +template +void test() { + std::basic_string str = MAKE_STRING(CharT, "ABCDEF"); + CharT arr[6]; + std::copy(str.begin(), str.end(), arr); + + { + streambuf buff; + TEST_LIBCPP_ASSERT_FAILURE( + buff.setg(std::begin(arr) + 1, std::begin(arr), std::end(arr)), "[gbeg, gnext) must be a valid range"); + } + { + streambuf buff; + TEST_LIBCPP_ASSERT_FAILURE( + buff.setg(std::begin(arr) + 1, std::begin(arr) + 1, std::begin(arr)), "[gbeg, gend) must be a valid range"); + } + { + streambuf buff; + TEST_LIBCPP_ASSERT_FAILURE( + buff.setg(std::begin(arr), std::begin(arr) + 3, std::begin(arr) + 2), "[gnext, gend) must be a valid range"); + } +} + +int main(int, char**) { + test(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + + return 0; +} diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp new file mode 100644 index 00000000000000..abd42272de508c --- /dev/null +++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.assert.pass.cpp @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// UNSUPPORTED: libcpp-hardening-mode=none +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +// + +// template > +// class basic_streambuf; + +// void setp(char_type* pbeg, char_type* pend); + +#include +#include +#include +#include + +#include "check_assertion.h" +#include "make_string.h" +#include "test_macros.h" + +template +struct streambuf : public std::basic_streambuf { + typedef std::basic_streambuf base; + + streambuf() {} + + void setp(CharT* pbeg, CharT* pend) { base::setp(pbeg, pend); } +}; + +template +void test() { + std::basic_string str = MAKE_STRING(CharT, "ABCDEF"); + CharT arr[6]; + std::copy(str.begin(), str.end(), arr); + + { + streambuf buff; + TEST_LIBCPP_ASSERT_FAILURE(buff.setp(std::begin(arr) + 3, std::begin(arr)), "[pbeg, pend) must be a valid range"); + } +} + +int main(int, char**) { + test(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + + return 0; +} diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index bf5c0a51f944ae..96c1e2664f7a64 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -223,7 +223,7 @@ void test_char() { static_assert(sizeof(CharT) == 4, "add support for unexpected size"); // Unicode fitting in a 32-bit wchar_t - constexpr wchar_t x = 0x1ffff; + constexpr wchar_t x = 0x1ffff; constexpr std::uint32_t y = 0x1ffff; static_assert(x == y); @@ -290,7 +290,7 @@ void test_string() { test_format(SV("[\"\ud7ff\"]"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid #else /* U+D800..D+DFFFF surrogate range */ - test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid + test_format(SV(R"(["\u{d7ff}"])"), SV("[{:?}]"), "\xed\x9f\xbf"); // U+D7FF last valid #endif test_format(SV(R"(["\x{ed}\x{a0}\x{80}"])"), SV("[{:?}]"), "\xed\xa0\x80"); // U+D800 test_format(SV(R"(["\x{ed}\x{af}\x{bf}"])"), SV("[{:?}]"), "\xed\xaf\xbf"); // U+DBFF @@ -319,7 +319,8 @@ void test_string() { test_format(SV("[\"\u00c3(\"]"), SV("[{:?}]"), L"\xc3\x28"); } - test_format(SV(R"(["🤷🏻\u{200d}♂\u{fe0f}"])"), SV("[{:?}]"), SV("🤷🏻‍♂️")); + // LWG-3965 + test_format(SV(R"(["🤷🏻\u{200d}♂️"])"), SV("[{:?}]"), SV("🤷🏻‍♂️")); // *** Special cases *** test_format(SV(R"("\t\n\r\\'\" ")"), SV("{:?}"), SV("\t\n\r\\'\" ")); @@ -336,6 +337,11 @@ void test_string() { if constexpr (sizeof(CharT) == 1) test_format(SV(R"("\x{80}")"), SV("{:?}"), SV("\x80")); + // *** P2713R1 examples *** + test_format(SV(R"(["\u{301}"])"), SV("[{:?}]"), SV("\u0301")); + test_format(SV(R"(["\\\u{301}"])"), SV("[{:?}]"), SV("\\\u0301")); + test_format(SV(R"(["ẹ́"])"), SV("[{:?}]"), SV("e\u0301\u0323")); + #ifndef TEST_HAS_NO_WIDE_CHARACTERS if constexpr (sizeof(CharT) > 1) { using V = std::basic_string_view; @@ -373,7 +379,7 @@ void test_string() { static_assert(sizeof(CharT) == 4, "add support for unexpected size"); // Unicode fitting in a 32-bit wchar_t - constexpr wchar_t x = 0x1ffff; + constexpr wchar_t x = 0x1ffff; constexpr std::uint32_t y = 0x1ffff; static_assert(x == y); @@ -406,20 +412,18 @@ void test_format_functions(TestFunction check) { check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hellö")); check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hellö")); - check(SV(R"("hello\u{308}")"), SV("{:*>10?}"), SV("hello\u0308")); - check(SV(R"(***"hello\u{308}")"), SV("{:*>17?}"), SV("hello\u0308")); - check(SV(R"(*"hello\u{308}"**)"), SV("{:*^17?}"), SV("hello\u0308")); - check(SV(R"("hello\u{308}"***)"), SV("{:*<17?}"), SV("hello\u0308")); + check(SV(R"(***"hellö")"), SV("{:*>10?}"), SV("hello\u0308")); + check(SV(R"(*"hellö"**)"), SV("{:*^10?}"), SV("hello\u0308")); + check(SV(R"("hellö"***)"), SV("{:*<10?}"), SV("hello\u0308")); - check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:*>10?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"(***"hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:*>30?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"(*"hello 🤷🏻\u{200d}♂\u{fe0f}"**)"), SV("{:*^30?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}"***)"), SV("{:*<30?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"(***"hello 🤷🏻\u{200d}♂️")"), SV("{:*>22?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"(*"hello 🤷🏻\u{200d}♂️"**)"), SV("{:*^22?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hello 🤷🏻\u{200d}♂️"***)"), SV("{:*<22?}"), SV("hello 🤷🏻‍♂️")); // *** width *** check(SV(R"("hellö" )"), SV("{:10?}"), SV("hellö")); - check(SV(R"("hello\u{308}" )"), SV("{:17?}"), SV("hello\u0308")); - check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}" )"), SV("{:30?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hellö" )"), SV("{:10?}"), SV("hello\u0308")); + check(SV(R"("hello 🤷🏻\u{200d}♂️" )"), SV("{:22?}"), SV("hello 🤷🏻‍♂️")); // *** precision *** check(SV(R"("hell)"), SV("{:.5?}"), SV("hellö")); @@ -431,9 +435,8 @@ void test_format_functions(TestFunction check) { check(SV(R"("hello 🤷🏻)"), SV("{:.9?}"), SV("hello 🤷🏻‍♂️")); check(SV(R"("hello 🤷🏻\)"), SV("{:.10?}"), SV("hello 🤷🏻‍♂️")); check(SV(R"("hello 🤷🏻\u{200d})"), SV("{:.17?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂)"), SV("{:.18?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂\)"), SV("{:.19?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}")"), SV("{:.28?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hello 🤷🏻\u{200d}♂️)"), SV("{:.18?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hello 🤷🏻\u{200d}♂️")"), SV("{:.19?}"), SV("hello 🤷🏻‍♂️")); // *** width & precision *** check(SV(R"("hell#########################)"), SV("{:#<30.5?}"), SV("hellö")); @@ -445,9 +448,8 @@ void test_format_functions(TestFunction check) { check(SV(R"("hello 🤷🏻#####################)"), SV("{:#<30.9?}"), SV("hello 🤷🏻‍♂️")); check(SV(R"("hello 🤷🏻\####################)"), SV("{:#<30.10?}"), SV("hello 🤷🏻‍♂️")); check(SV(R"("hello 🤷🏻\u{200d}#############)"), SV("{:#<30.17?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂############)"), SV("{:#<30.18?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂\###########)"), SV("{:#<30.19?}"), SV("hello 🤷🏻‍♂️")); - check(SV(R"("hello 🤷🏻\u{200d}♂\u{fe0f}"###)"), SV("{:#<30.28?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hello 🤷🏻\u{200d}♂️############)"), SV("{:#<30.18?}"), SV("hello 🤷🏻‍♂️")); + check(SV(R"("hello 🤷🏻\u{200d}♂️"###########)"), SV("{:#<30.19?}"), SV("hello 🤷🏻‍♂️")); } template diff --git a/libcxx/utils/generate_escaped_output_table.py b/libcxx/utils/generate_escaped_output_table.py index c6bde8f2411cf6..a11ce259096d5a 100755 --- a/libcxx/utils/generate_escaped_output_table.py +++ b/libcxx/utils/generate_escaped_output_table.py @@ -39,12 +39,6 @@ class Entry: ) -def filterCoreProperty(element: PropertyRange) -> Optional[PropertyRange]: - if element.prop == "Grapheme_Extend": - return element - return None - - # https://www.unicode.org/reports/tr44/#GC_Values_Table def filterGeneralProperty(element: PropertyRange) -> Optional[PropertyRange]: if element.prop in ["Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn"]: @@ -94,10 +88,9 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: /// The entries of the characters to escape in format's debug string. /// /// Contains the entries for [format.string.escaped]/2.2.1.2.1 -/// CE is a Unicode encoding and C corresponds to either a UCS scalar value -/// whose Unicode property General_Category has a value in the groups -/// Separator (Z) or Other (C) or to a UCS scalar value which has the Unicode -/// property Grapheme_Extend=Yes, as described by table 12 of UAX #44 +/// CE is a Unicode encoding and C corresponds to a UCS scalar value whose +/// Unicode property General_Category has a value in the groups Separator (Z) +/// or Other (C), as described by table 12 of UAX #44 /// /// Separator (Z) consists of General_Category /// - Space_Separator, @@ -112,7 +105,6 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: /// - Unassigned. /// /// The data is generated from -/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt /// /// The table is similar to the table @@ -132,7 +124,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: /// lookup table would add 446 entries to the table (in Unicode 14). /// Instead the only the start of the region is stored, every code point in /// this region needs to be escaped. -inline constexpr uint32_t __unallocated_region_lower_bound = 0x{unallocated:08x}; +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __unallocated_region_lower_bound = 0x{unallocated:08x}; /// Returns whether the code unit needs to be escaped. /// @@ -262,7 +254,7 @@ def property_ranges_to_table(ranges: list[PropertyRange]) -> list[Entry]: return result -cpp_entrytemplate = " 0x{:08x}" +cpp_entrytemplate = " 0x{:08x} /* {:08x} - {:08x} [{:>5}] */" def generate_cpp_data(ranges: list[PropertyRange], unallocated: int) -> str: @@ -272,7 +264,15 @@ def generate_cpp_data(ranges: list[PropertyRange], unallocated: int) -> str: DATA_ARRAY_TEMPLATE.format( size=len(table), entries=",\n".join( - [cpp_entrytemplate.format(x.lower << 11 | x.offset) for x in table] + [ + cpp_entrytemplate.format( + x.lower << 11 | x.offset, + x.lower, + x.lower + x.offset, + x.offset + 1, + ) + for x in table + ] ), unallocated=unallocated, ) @@ -291,12 +291,6 @@ def generate_data_tables() -> str: / "unicode" / "DerivedGeneralCategory.txt" ) - derived_core_catagory_path = ( - Path(__file__).absolute().parent - / "data" - / "unicode" - / "DerivedCoreProperties.txt" - ) properties = list() with derived_general_catagory_path.open(encoding="utf-8") as f: @@ -308,15 +302,6 @@ def generate_data_tables() -> str: ) ) ) - with derived_core_catagory_path.open(encoding="utf-8") as f: - properties.extend( - list( - filter( - filterCoreProperty, - [x for line in f if (x := parsePropertyLine(line))], - ) - ) - ) data = compactPropertyRanges(sorted(properties, key=lambda x: x.lower)) @@ -328,8 +313,12 @@ def generate_data_tables() -> str: # # When this region becomes substantially smaller we need to investigate # this design. + # + # Due to P2713R1 Escaping improvements in std::format the range + # E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + # is no longer part of these entries. This causes an increase in the size + # of the table. assert data[-1].upper == 0x10FFFF - assert data[-1].upper - data[-1].lower > 900000 return "\n".join([generate_cpp_data(data[:-1], data[-1].lower)]) diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 20088d92bafa2f..7b9c9c6c6c3852 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -1084,10 +1084,62 @@ static void mergeArch(RISCVISAInfo::OrderedExtensionMap &mergedExts, } } +static void mergeAtomic(DenseMap::iterator it, + const InputSectionBase *oldSection, + const InputSectionBase *newSection, unsigned int oldTag, + unsigned int newTag) { + using RISCVAttrs::RISCVAtomicAbiTag::AtomicABI; + // Same tags stay the same, and UNKNOWN is compatible with anything + if (oldTag == newTag || newTag == AtomicABI::UNKNOWN) + return; + + switch (oldTag) { + case AtomicABI::UNKNOWN: + it->getSecond() = newTag; + return; + case AtomicABI::A6C: + switch (newTag) { + case AtomicABI::A6S: + it->getSecond() = AtomicABI::A6C; + return; + case AtomicABI::A7: + error(toString(oldSection) + " has atomic_abi=" + Twine(oldTag) + + " but " + toString(newSection) + + " has atomic_abi=" + Twine(newTag)); + return; + }; + + case AtomicABI::A6S: + switch (newTag) { + case AtomicABI::A6C: + it->getSecond() = AtomicABI::A6C; + return; + case AtomicABI::A7: + it->getSecond() = AtomicABI::A7; + return; + }; + + case AtomicABI::A7: + switch (newTag) { + case AtomicABI::A6S: + it->getSecond() = AtomicABI::A7; + return; + case AtomicABI::A6C: + error(toString(oldSection) + " has atomic_abi=" + Twine(oldTag) + + " but " + toString(newSection) + + " has atomic_abi=" + Twine(newTag)); + return; + }; + default: + llvm_unreachable("unknown AtomicABI"); + }; +} + static RISCVAttributesSection * mergeAttributesSection(const SmallVector §ions) { RISCVISAInfo::OrderedExtensionMap exts; const InputSectionBase *firstStackAlign = nullptr; + const InputSectionBase *firstAtomicAbi = nullptr; unsigned firstStackAlignValue = 0, xlen = 0; bool hasArch = false; @@ -1134,6 +1186,17 @@ mergeAttributesSection(const SmallVector §ions) { case RISCVAttrs::PRIV_SPEC_MINOR: case RISCVAttrs::PRIV_SPEC_REVISION: break; + + case llvm::RISCVAttrs::AttrType::ATOMIC_ABI: + if (auto i = parser.getAttributeValue(tag.attr)) { + auto r = merged.intAttr.try_emplace(tag.attr, *i); + if (r.second) { + firstAtomicAbi = sec; + } else { + mergeAtomic(r.first, firstAtomicAbi, sec, r.first->getSecond(), *i); + } + } + continue; } // Fallback for deprecated priv_spec* and other unknown attributes: retain diff --git a/lld/test/ELF/mips-eh_frame-pic.s b/lld/test/ELF/mips-eh_frame-pic.s index c04dbdf57b08ad..a84c36b0e5ecdb 100644 --- a/lld/test/ELF/mips-eh_frame-pic.s +++ b/lld/test/ELF/mips-eh_frame-pic.s @@ -36,8 +36,8 @@ # RELOCS: .rel{{a?}}.eh_frame { # ABS32-RELOCS-NEXT: 0x1C R_MIPS_32 .text # ABS64-RELOCS-NEXT: 0x1C R_MIPS_64/R_MIPS_NONE/R_MIPS_NONE .text -# PIC64-RELOCS-NEXT: 0x1C R_MIPS_PC32/R_MIPS_NONE/R_MIPS_NONE .L0 -# PIC32-RELOCS-NEXT: 0x1C R_MIPS_PC32 .L0 +# PIC64-RELOCS-NEXT: 0x1C R_MIPS_PC32/R_MIPS_NONE/R_MIPS_NONE +# PIC32-RELOCS-NEXT: 0x1C R_MIPS_PC32 # RELOCS-NEXT: } # ABS64-EH-FRAME: Augmentation data: 0C diff --git a/lld/test/ELF/riscv-attributes.s b/lld/test/ELF/riscv-attributes.s index d0ce0941269ec4..77c2c3cb263fd1 100644 --- a/lld/test/ELF/riscv-attributes.s +++ b/lld/test/ELF/riscv-attributes.s @@ -44,6 +44,39 @@ # RUN: not ld.lld a.o b.o c.o diff_stack_align.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=STACK_ALIGN --implicit-check-not=error: # STACK_ALIGN: error: diff_stack_align.o:(.riscv.attributes) has stack_align=32 but a.o:(.riscv.attributes) has stack_align=16 +## merging atomic_abi values for A6C and A7 lead to an error. +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A6C.s -o atomic_abi_A6C.o +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A7.s -o atomic_abi_A7.o +# RUN: not ld.lld atomic_abi_A6C.o atomic_abi_A7.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ATOMIC_ABI_ERROR --implicit-check-not=error: +# ATOMIC_ABI_ERROR: error: atomic_abi_A6C.o:(.riscv.attributes) has atomic_abi=1 but atomic_abi_A7.o:(.riscv.attributes) has atomic_abi=3 + + +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_A6S.s -o atomic_abi_A6S.o +# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A6C.o -o atomic_abi_A6C_A6S +# RUN: llvm-readobj -A atomic_abi_A6C_A6S | FileCheck %s --check-prefix=A6C_A6S + +# RUN: ld.lld atomic_abi_A6S.o atomic_abi_A7.o -o atomic_abi_A6S_A7 +# RUN: llvm-readobj -A atomic_abi_A6S_A7 | FileCheck %s --check-prefix=A6S_A7 + +# RUN: llvm-mc -filetype=obj -triple=riscv64 atomic_abi_unknown.s -o atomic_abi_unknown.o +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6C.o -o atomic_abi_A6C_unknown +# RUN: llvm-readobj -A atomic_abi_A6C_unknown | FileCheck %s --check-prefixes=UNKNOWN_A6C + +# RUN: ld.lld atomic_abi_unknown.o diff_stack_align.o -o atomic_abi_none_unknown +# RUN: llvm-readobj -A atomic_abi_none_unknown | FileCheck %s --check-prefixes=UNKNOWN_NONE + +# RUN: ld.lld diff_stack_align.o atomic_abi_A6C.o -o atomic_abi_A6C_none +# RUN: llvm-readobj -A atomic_abi_A6C_none | FileCheck %s --check-prefixes=NONE_A6C + +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A6S.o -o atomic_abi_A6S_unknown +# RUN: llvm-readobj -A atomic_abi_A6S_unknown | FileCheck %s --check-prefix=UNKNOWN_A6S + +# RUN: ld.lld atomic_abi_unknown.o atomic_abi_A7.o -o atomic_abi_A7_unknown +# RUN: llvm-readobj -A atomic_abi_A7_unknown | FileCheck %s --check-prefix=UNKNOWN_A7 + +# RUN: ld.lld diff_stack_align.o atomic_abi_A7.o -o atomic_abi_A7_none +# RUN: llvm-readobj -A atomic_abi_A7_none | FileCheck %s --check-prefix=NONE_A7 + ## The deprecated priv_spec is not handled as GNU ld does. ## Differing priv_spec attributes lead to an absent attribute. # RUN: llvm-mc -filetype=obj -triple=riscv64 diff_priv_spec.s -o diff_priv_spec.o @@ -286,6 +319,175 @@ .attribute priv_spec, 3 .attribute priv_spec_minor, 3 +#--- atomic_abi_unknown.s +.attribute atomic_abi, 0 + +#--- atomic_abi_A6C.s +.attribute atomic_abi, 1 + +#--- atomic_abi_A6S.s +.attribute atomic_abi, 2 + +#--- atomic_abi_A7.s +.attribute atomic_abi, 3 + +# UNKNOWN_NONE: BuildAttributes { +# UNKNOWN_NONE-NEXT: FormatVersion: 0x41 +# UNKNOWN_NONE-NEXT: Section 1 { +# UNKNOWN_NONE-NEXT: SectionLength: 17 +# UNKNOWN_NONE-NEXT: Vendor: riscv +# UNKNOWN_NONE-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_NONE-NEXT: Size: 7 +# UNKNOWN_NONE-NEXT: FileAttributes { +# UNKNOWN_NONE-NEXT: Attribute { +# UNKNOWN_NONE-NEXT: Tag: 4 +# UNKNOWN_NONE-NEXT: Value: 32 +# UNKNOWN_NONE-NEXT: TagName: stack_align +# UNKNOWN_NONE-NEXT: Description: Stack alignment is 32-bytes +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } +# UNKNOWN_NONE-NEXT: } + +# NONE_A6C: BuildAttributes { +# NONE_A6C-NEXT: FormatVersion: 0x41 +# NONE_A6C-NEXT: Section 1 { +# NONE_A6C-NEXT: SectionLength: 19 +# NONE_A6C-NEXT: Vendor: riscv +# NONE_A6C-NEXT: Tag: Tag_File (0x1) +# NONE_A6C-NEXT: Size: 9 +# NONE_A6C-NEXT: FileAttributes { +# NONE_A6C-NEXT: Attribute { +# NONE_A6C-NEXT: Tag: 14 +# NONE_A6C-NEXT: Value: 1 +# NONE_A6C-NEXT: TagName: atomic_abi +# NONE_A6C-NEXT: Description: Atomic ABI is 1 +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: Attribute { +# NONE_A6C-NEXT: Tag: 4 +# NONE_A6C-NEXT: Value: 32 +# NONE_A6C-NEXT: TagName: stack_align +# NONE_A6C-NEXT: Description: Stack alignment is 32-bytes +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } +# NONE_A6C-NEXT: } + +# UNKNOWN_A6C: BuildAttributes { +# UNKNOWN_A6C-NEXT: FormatVersion: 0x41 +# UNKNOWN_A6C-NEXT: Section 1 { +# UNKNOWN_A6C-NEXT: SectionLength: 17 +# UNKNOWN_A6C-NEXT: Vendor: riscv +# UNKNOWN_A6C-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A6C-NEXT: Size: 7 +# UNKNOWN_A6C-NEXT: FileAttributes { +# UNKNOWN_A6C-NEXT: Attribute { +# UNKNOWN_A6C-NEXT: Tag: 14 +# UNKNOWN_A6C-NEXT: Value: 1 +# UNKNOWN_A6C-NEXT: TagName: atomic_abi +# UNKNOWN_A6C-NEXT: Description: Atomic ABI is 1 +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } +# UNKNOWN_A6C-NEXT: } + +# UNKNOWN_A6S: BuildAttributes { +# UNKNOWN_A6S-NEXT: FormatVersion: 0x41 +# UNKNOWN_A6S-NEXT: Section 1 { +# UNKNOWN_A6S-NEXT: SectionLength: +# UNKNOWN_A6S-NEXT: Vendor: riscv +# UNKNOWN_A6S-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A6S-NEXT: Size: 7 +# UNKNOWN_A6S-NEXT: FileAttributes { +# UNKNOWN_A6S-NEXT: Attribute { +# UNKNOWN_A6S-NEXT: Tag: 14 +# UNKNOWN_A6S-NEXT: Value: 2 +# UNKNOWN_A6S-NEXT: TagName: atomic_abi +# UNKNOWN_A6S-NEXT: Description: Atomic ABI is 2 +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } +# UNKNOWN_A6S-NEXT: } + +# NONE_A7: BuildAttributes { +# NONE_A7-NEXT: FormatVersion: 0x41 +# NONE_A7-NEXT: Section 1 { +# NONE_A7-NEXT: SectionLength: 19 +# NONE_A7-NEXT: Vendor: riscv +# NONE_A7-NEXT: Tag: Tag_File (0x1) +# NONE_A7-NEXT: Size: 9 +# NONE_A7-NEXT: FileAttributes { +# NONE_A7-NEXT: Attribute { +# NONE_A7-NEXT: Tag: 14 +# NONE_A7-NEXT: Value: 3 +# NONE_A7-NEXT: TagName: atomic_abi +# NONE_A7-NEXT: Description: Atomic ABI is 3 +# NONE_A7-NEXT: } +# NONE_A7-NEXT: Attribute { +# NONE_A7-NEXT: Tag: 4 +# NONE_A7-NEXT: Value: 32 +# NONE_A7-NEXT: TagName: stack_align +# NONE_A7-NEXT: Description: Stack alignment is 32-bytes +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } +# NONE_A7-NEXT: } + + +# UNKNOWN_A7: BuildAttributes { +# UNKNOWN_A7-NEXT: FormatVersion: 0x41 +# UNKNOWN_A7-NEXT: Section 1 { +# UNKNOWN_A7-NEXT: SectionLength: 17 +# UNKNOWN_A7-NEXT: Vendor: riscv +# UNKNOWN_A7-NEXT: Tag: Tag_File (0x1) +# UNKNOWN_A7-NEXT: Size: 7 +# UNKNOWN_A7-NEXT: FileAttributes { +# UNKNOWN_A7-NEXT: Attribute { +# UNKNOWN_A7-NEXT: Tag: 14 +# UNKNOWN_A7-NEXT: Value: 3 +# UNKNOWN_A7-NEXT: TagName: atomic_abi +# UNKNOWN_A7-NEXT: Description: Atomic ABI is 3 +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } +# UNKNOWN_A7-NEXT: } + +# A6C_A6S: BuildAttributes { +# A6C_A6S-NEXT: FormatVersion: 0x41 +# A6C_A6S-NEXT: Section 1 { +# A6C_A6S-NEXT: SectionLength: 17 +# A6C_A6S-NEXT: Vendor: riscv +# A6C_A6S-NEXT: Tag: Tag_File (0x1) +# A6C_A6S-NEXT: Size: 7 +# A6C_A6S-NEXT: FileAttributes { +# A6C_A6S-NEXT: Attribute { +# A6C_A6S-NEXT: Tag: 14 +# A6C_A6S-NEXT: Value: 1 +# A6C_A6S-NEXT: TagName: atomic_abi +# A6C_A6S-NEXT: Description: Atomic ABI is 1 +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } +# A6C_A6S-NEXT: } + +# A6S_A7: BuildAttributes { +# A6S_A7-NEXT: FormatVersion: 0x41 +# A6S_A7-NEXT: Section 1 { +# A6S_A7-NEXT: SectionLength: 17 +# A6S_A7-NEXT: Vendor: riscv +# A6S_A7-NEXT: Tag: Tag_File (0x1) +# A6S_A7-NEXT: Size: 7 +# A6S_A7-NEXT: FileAttributes { +# A6S_A7-NEXT: Attribute { +# A6S_A7-NEXT: Tag: 14 +# A6S_A7-NEXT: Value: 3 +# A6S_A7-NEXT: TagName: atomic_abi +# A6S_A7-NEXT: Description: Atomic ABI is 3 +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } +# A6S_A7-NEXT: } + #--- unknown13.s .attribute 13, "0" #--- unknown13a.s diff --git a/lldb/bindings/interface/SBValueDocstrings.i b/lldb/bindings/interface/SBValueDocstrings.i index 6bab923e8b35a6..59fa807f5ec95c 100644 --- a/lldb/bindings/interface/SBValueDocstrings.i +++ b/lldb/bindings/interface/SBValueDocstrings.i @@ -135,6 +135,26 @@ linked list." %feature("docstring", "Expands nested expressions like .a->b[0].c[1]->d." ) lldb::SBValue::GetValueForExpressionPath; +%feature("docstring", " + Return the value as an address. On failure, LLDB_INVALID_ADDRESS + will be returned. On architectures like AArch64, where the + top (unaddressable) bits can be used for authentication, + memory tagging, or top byte ignore, this method will return + the value with those top bits cleared. + + GetValueAsUnsigned returns the actual value, with the + authentication/Top Byte Ignore/Memory Tagging Extension bits. + + Calling this on a random value which is not a pointer is + incorrect. Call GetType().IsPointerType() if in doubt. + + An SB API program may want to show both the literal byte value + and the address it refers to in memory. These two SBValue + methods allow SB API writers to behave appropriately for their + interface." +) lldb::SBValue::GetValueAsAddress; + + %feature("doctstring", " Returns the number for children. diff --git a/lldb/include/lldb/API/SBLineEntry.h b/lldb/include/lldb/API/SBLineEntry.h index 7c2431ba3c8a51..d70c4fac6ec717 100644 --- a/lldb/include/lldb/API/SBLineEntry.h +++ b/lldb/include/lldb/API/SBLineEntry.h @@ -29,6 +29,9 @@ class LLDB_API SBLineEntry { lldb::SBAddress GetEndAddress() const; + lldb::SBAddress + GetSameLineContiguousAddressRangeEnd(bool include_inlined_functions) const; + explicit operator bool() const; bool IsValid() const; diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 7da3335a7234b7..f1b5d1fb92ce29 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -562,6 +562,8 @@ class LLDB_API SBProcess { lldb::SBScriptObject GetScriptedImplementation(); + void GetStatus(SBStream &status); + protected: friend class SBAddress; friend class SBBreakpoint; diff --git a/lldb/include/lldb/API/SBSymbolContextList.h b/lldb/include/lldb/API/SBSymbolContextList.h index 4026afc213571c..95100d219df20f 100644 --- a/lldb/include/lldb/API/SBSymbolContextList.h +++ b/lldb/include/lldb/API/SBSymbolContextList.h @@ -44,6 +44,7 @@ class LLDB_API SBSymbolContextList { protected: friend class SBModule; friend class SBTarget; + friend class SBCompileUnit; lldb_private::SymbolContextList *operator->() const; diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index 823615e6a36df5..feeaa1cb71132b 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -879,6 +879,10 @@ class LLDB_API SBTarget { uint32_t count, const char *flavor_string); + lldb::SBInstructionList ReadInstructions(lldb::SBAddress start_addr, + lldb::SBAddress end_addr, + const char *flavor_string); + lldb::SBInstructionList GetInstructions(lldb::SBAddress base_addr, const void *buf, size_t size); diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h index 67f55ce7da2877..8f4c4fd56dfb18 100644 --- a/lldb/include/lldb/API/SBValue.h +++ b/lldb/include/lldb/API/SBValue.h @@ -68,6 +68,8 @@ class LLDB_API SBValue { uint64_t GetValueAsUnsigned(uint64_t fail_value = 0); + lldb::addr_t GetValueAsAddress(); + ValueType GetValueType(); // If you call this on a newly created ValueObject, it will always return diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 27a76a652f4063..5838281bcb1a10 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -811,23 +811,34 @@ def request_next(self, threadId): command_dict = {"command": "next", "type": "request", "arguments": args_dict} return self.send_recv(command_dict) - def request_stepIn(self, threadId): + def request_stepIn(self, threadId, targetId): if self.exit_status is not None: - raise ValueError("request_continue called after process exited") - args_dict = {"threadId": threadId} + raise ValueError("request_stepIn called after process exited") + args_dict = {"threadId": threadId, "targetId": targetId} command_dict = {"command": "stepIn", "type": "request", "arguments": args_dict} return self.send_recv(command_dict) + def request_stepInTargets(self, frameId): + if self.exit_status is not None: + raise ValueError("request_stepInTargets called after process exited") + args_dict = {"frameId": frameId} + command_dict = { + "command": "stepInTargets", + "type": "request", + "arguments": args_dict, + } + return self.send_recv(command_dict) + def request_stepOut(self, threadId): if self.exit_status is not None: - raise ValueError("request_continue called after process exited") + raise ValueError("request_stepOut called after process exited") args_dict = {"threadId": threadId} command_dict = {"command": "stepOut", "type": "request", "arguments": args_dict} return self.send_recv(command_dict) def request_pause(self, threadId=None): if self.exit_status is not None: - raise ValueError("request_continue called after process exited") + raise ValueError("request_pause called after process exited") if threadId is None: threadId = self.get_thread_id() args_dict = {"threadId": threadId} diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 23f650d2d36fdd..d56ea5dca14beb 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -218,8 +218,8 @@ def set_global(self, name, value, id=None): """Set a top level global variable only.""" return self.dap_server.request_setVariable(2, name, str(value), id=id) - def stepIn(self, threadId=None, waitForStop=True): - self.dap_server.request_stepIn(threadId=threadId) + def stepIn(self, threadId=None, targetId=None, waitForStop=True): + self.dap_server.request_stepIn(threadId=threadId, targetId=targetId) if waitForStop: return self.dap_server.wait_for_stopped() return None diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp index 99a7b8fe644cb5..216ea6d18eab89 100644 --- a/lldb/source/API/SBLineEntry.cpp +++ b/lldb/source/API/SBLineEntry.cpp @@ -67,6 +67,21 @@ SBAddress SBLineEntry::GetEndAddress() const { return sb_address; } +SBAddress SBLineEntry::GetSameLineContiguousAddressRangeEnd( + bool include_inlined_functions) const { + LLDB_INSTRUMENT_VA(this); + + SBAddress sb_address; + if (m_opaque_up) { + AddressRange line_range = m_opaque_up->GetSameLineContiguousAddressRange( + include_inlined_functions); + + sb_address.SetAddress(line_range.GetBaseAddress()); + sb_address.OffsetAddress(line_range.GetByteSize()); + } + return sb_address; +} + bool SBLineEntry::IsValid() const { LLDB_INSTRUMENT_VA(this); return this->operator bool(); diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp index c73348fde3f74d..c37c111c5a58e0 100644 --- a/lldb/source/API/SBProcess.cpp +++ b/lldb/source/API/SBProcess.cpp @@ -928,6 +928,14 @@ size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len, return bytes_written; } +void SBProcess::GetStatus(SBStream &status) { + LLDB_INSTRUMENT_VA(this, status); + + ProcessSP process_sp(GetSP()); + if (process_sp) + process_sp->GetStatus(status.ref()); +} + bool SBProcess::GetDescription(SBStream &description) { LLDB_INSTRUMENT_VA(this, description); diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 75f0444f629114..962ce9ba83cc77 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -2011,6 +2011,30 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr, return sb_instructions; } +lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress start_addr, + lldb::SBAddress end_addr, + const char *flavor_string) { + LLDB_INSTRUMENT_VA(this, start_addr, end_addr, flavor_string); + + SBInstructionList sb_instructions; + + TargetSP target_sp(GetSP()); + if (target_sp) { + lldb::addr_t start_load_addr = start_addr.GetLoadAddress(*this); + lldb::addr_t end_load_addr = end_addr.GetLoadAddress(*this); + if (end_load_addr > start_load_addr) { + lldb::addr_t size = end_load_addr - start_load_addr; + + AddressRange range(start_load_addr, size); + const bool force_live_memory = true; + sb_instructions.SetDisassembler(Disassembler::DisassembleRange( + target_sp->GetArchitecture(), nullptr, flavor_string, *target_sp, + range, force_live_memory)); + } + } + return sb_instructions; +} + lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr, const void *buf, size_t size) { diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index 94a8f3ea319e89..c53ec5a7464829 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -909,6 +909,25 @@ uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) { return fail_value; } +lldb::addr_t SBValue::GetValueAsAddress() { + addr_t fail_value = LLDB_INVALID_ADDRESS; + ValueLocker locker; + lldb::ValueObjectSP value_sp(GetSP(locker)); + if (value_sp) { + bool success = true; + uint64_t ret_val = fail_value; + ret_val = value_sp->GetValueAsUnsigned(fail_value, &success); + if (!success) + return fail_value; + ProcessSP process_sp = m_opaque_sp->GetProcessSP(); + if (!process_sp) + return ret_val; + return process_sp->FixDataAddress(ret_val); + } + + return fail_value; +} + bool SBValue::MightHaveChildren() { LLDB_INSTRUMENT_VA(this); diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index d3fc487aed4333..9409497f1c81ba 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -1869,15 +1869,15 @@ AppleObjCRuntimeV2::DynamicClassInfoExtractor::ComputeHelper( if (loader->IsFullyInitialized()) { switch (exe_ctx.GetTargetRef().GetDynamicClassInfoHelper()) { case eDynamicClassInfoHelperAuto: - LLVM_FALLTHROUGH; + [[fallthrough]]; case eDynamicClassInfoHelperGetRealizedClassList: if (m_runtime.m_has_objc_getRealizedClassList_trylock) return DynamicClassInfoExtractor::objc_getRealizedClassList_trylock; - LLVM_FALLTHROUGH; + [[fallthrough]]; case eDynamicClassInfoHelperCopyRealizedClassList: if (m_runtime.m_has_objc_copyRealizedClassList) return DynamicClassInfoExtractor::objc_copyRealizedClassList; - LLVM_FALLTHROUGH; + [[fallthrough]]; case eDynamicClassInfoHelperRealizedClassesStruct: return DynamicClassInfoExtractor::gdb_objc_realized_classes; } diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 3d37bb226a65fd..ae1a77e5be8321 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -2087,7 +2087,7 @@ void GDBRemoteCommunicationServerLLGS::AddProcessThreads( GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qfThreadInfo( StringExtractorGDBRemote &packet) { - assert(m_debugged_processes.size() == 1 || + assert(m_debugged_processes.size() <= 1 || bool(m_extensions_supported & NativeProcessProtocol::Extension::multiprocess)); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 41d81fbcf1b087..12dafd3f5d5d51 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -495,6 +495,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_const_type: case DW_TAG_restrict_type: case DW_TAG_volatile_type: + case DW_TAG_LLVM_ptrauth_type: case DW_TAG_atomic_type: case DW_TAG_unspecified_type: { type_sp = ParseTypeModifier(sc, die, attrs); diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 5da94adb771f86..8fc0f9103f5541 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -4861,7 +4861,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::BuiltinType::Kind::OCLQueue: case clang::BuiltinType::Kind::OCLReserveID: case clang::BuiltinType::Kind::OCLSampler: - case clang::BuiltinType::Kind::OMPArraySection: + case clang::BuiltinType::Kind::ArraySection: case clang::BuiltinType::Kind::OMPArrayShaping: case clang::BuiltinType::Kind::OMPIterator: case clang::BuiltinType::Kind::Overload: @@ -6013,7 +6013,7 @@ uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) { case clang::BuiltinType::ARCUnbridgedCast: case clang::BuiltinType::PseudoObject: case clang::BuiltinType::BuiltinFn: - case clang::BuiltinType::OMPArraySection: + case clang::BuiltinType::ArraySection: return 1; default: return 0; diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile b/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile new file mode 100644 index 00000000000000..10495940055b63 --- /dev/null +++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py b/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py new file mode 100644 index 00000000000000..382b0e7a81d231 --- /dev/null +++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/TestClearSBValueNonAddressableBits.py @@ -0,0 +1,59 @@ +"""Test that SBValue clears non-addressable bits""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestClearSBValueNonAddressableBits(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + # On AArch64 systems, the top bits that are not used for + # addressing may be used for TBI, MTE, and/or pointer + # authentication. + @skipIf(archs=no_match(["aarch64", "arm64", "arm64e"])) + + # Only run this test on systems where TBI is known to be + # enabled, so the address mask will clear the TBI bits. + @skipUnlessPlatform(["linux"] + lldbplatformutil.getDarwinOSTriples()) + def test(self): + self.source = "main.c" + self.build() + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "break here", lldb.SBFileSpec(self.source, False) + ) + + if self.TraceOn(): + self.runCmd("frame variable") + self.runCmd("frame variable &count &global") + + frame = thread.GetFrameAtIndex(0) + + count_p = frame.FindVariable("count_p") + count_invalid_p = frame.FindVariable("count_invalid_p") + self.assertEqual( + count_p.GetValueAsUnsigned(), count_invalid_p.GetValueAsAddress() + ) + self.assertNotEqual( + count_invalid_p.GetValueAsUnsigned(), count_invalid_p.GetValueAsAddress() + ) + self.assertEqual(5, count_p.Dereference().GetValueAsUnsigned()) + self.assertEqual(5, count_invalid_p.Dereference().GetValueAsUnsigned()) + + global_p = frame.FindVariable("global_p") + global_invalid_p = frame.FindVariable("global_invalid_p") + self.assertEqual( + global_p.GetValueAsUnsigned(), global_invalid_p.GetValueAsAddress() + ) + self.assertNotEqual( + global_invalid_p.GetValueAsUnsigned(), global_invalid_p.GetValueAsAddress() + ) + self.assertEqual(10, global_p.Dereference().GetValueAsUnsigned()) + self.assertEqual(10, global_invalid_p.Dereference().GetValueAsUnsigned()) + + main_p = frame.FindVariable("main_p") + main_invalid_p = frame.FindVariable("main_invalid_p") + self.assertEqual( + main_p.GetValueAsUnsigned(), main_invalid_p.GetValueAsAddress() + ) diff --git a/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c b/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c new file mode 100644 index 00000000000000..1b0e42c50dd678 --- /dev/null +++ b/lldb/test/API/clear-sbvalue-nonaddressable-bits/main.c @@ -0,0 +1,27 @@ +#include + +int global = 10; + +int main() { + int count = 5; + int *count_p = &count; + + // Add some metadata in the top byte (this will crash unless the + // test is running with TBI enabled, but we won't dereference it) + + intptr_t scratch = (intptr_t)count_p; + scratch |= (3ULL << 60); + int *count_invalid_p = (int *)scratch; + + int (*main_p)() = main; + scratch = (intptr_t)main_p; + scratch |= (3ULL << 60); + int (*main_invalid_p)() = (int (*)())scratch; + + int *global_p = &global; + scratch = (intptr_t)global_p; + scratch |= (3ULL << 60); + int *global_invalid_p = (int *)scratch; + + return count; // break here +} diff --git a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py index ffa0dc943e0693..8f456aaf890c7f 100644 --- a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py +++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py @@ -4,11 +4,23 @@ import dap_server import lldbdap_testcase +import psutil +from collections import deque from lldbsuite.test import lldbutil from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * +def get_subprocess(process_name): + queue = deque([psutil.Process(os.getpid())]) + while queue: + process = queue.popleft() + if process.name() == process_name: + return process + queue.extend(process.children()) + + self.assertTrue(False, "No subprocess with name %s found" % process_name) + class TestDAP_console(lldbdap_testcase.DAPTestCaseBase): def check_lldb_command( self, lldb_command, contains_string, assert_msg, command_escape_prefix="`" @@ -104,3 +116,49 @@ def test_empty_escape_prefix(self): "Help can be invoked", command_escape_prefix="", ) + + @skipIfWindows + @skipIfRemote + def test_exit_status_message_sigterm(self): + source = "main.cpp" + program = self.getBuildArtifact("a.out") + self.build_and_launch(program, commandEscapePrefix="") + breakpoint1_line = line_number(source, "// breakpoint 1") + breakpoint_ids = self.set_source_breakpoints(source, [breakpoint1_line]) + self.continue_to_breakpoints(breakpoint_ids) + + # Kill lldb-server process. + process_name = ( + "debugserver" if platform.system() in ["Darwin"] else "lldb-server" + ) + process = get_subprocess(process_name) + process.terminate() + process.wait() + + # Get the console output + console_output = self.collect_console(1.0) + + # Verify the exit status message is printed. + self.assertIn( + "exited with status = -1 (0xffffffff) debugserver died with signal SIGTERM", + console_output, + "Exit status does not contain message 'exited with status'", + ) + + @skipIfWindows + @skipIfRemote + def test_exit_status_message_ok(self): + source = "main.cpp" + program = self.getBuildArtifact("a.out") + self.build_and_launch(program, commandEscapePrefix="") + self.continue_to_exit() + + # Get the console output + console_output = self.collect_console(1.0) + + # Verify the exit status message is printed. + self.assertIn( + "exited with status = 0 (0x00000000)", + console_output, + "Exit status does not contain message 'exited with status'", + ) diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile b/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile new file mode 100644 index 00000000000000..f772575cd5613b --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stepInTargets/Makefile @@ -0,0 +1,6 @@ + +ENABLE_THREADS := YES + +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py b/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py new file mode 100644 index 00000000000000..6296f6554d07e5 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stepInTargets/TestDAP_stepInTargets.py @@ -0,0 +1,68 @@ +""" +Test lldb-dap stepInTargets request +""" + +import dap_server +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +import lldbdap_testcase +from lldbsuite.test import lldbutil + + +class TestDAP_stepInTargets(lldbdap_testcase.DAPTestCaseBase): + @skipIf( + archs=no_match(["x86_64"]) + ) # InstructionControlFlowKind for ARM is not supported yet. + def test_basic(self): + """ + Tests the basic stepping in targets with directly calls. + """ + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.cpp" + + breakpoint_line = line_number(source, "// set breakpoint here") + lines = [breakpoint_line] + # Set breakpoint in the thread function so we can step the threads + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.assertEqual( + len(breakpoint_ids), len(lines), "expect correct number of breakpoints" + ) + self.continue_to_breakpoints(breakpoint_ids) + + threads = self.dap_server.get_threads() + self.assertEqual(len(threads), 1, "expect one thread") + tid = threads[0]["id"] + + leaf_frame = self.dap_server.get_stackFrame() + self.assertIsNotNone(leaf_frame, "expect a leaf frame") + + # Request all step in targets list and verify the response. + step_in_targets_response = self.dap_server.request_stepInTargets( + leaf_frame["id"] + ) + self.assertEqual(step_in_targets_response["success"], True, "expect success") + self.assertIn( + "body", step_in_targets_response, "expect body field in response body" + ) + self.assertIn( + "targets", + step_in_targets_response["body"], + "expect targets field in response body", + ) + + step_in_targets = step_in_targets_response["body"]["targets"] + self.assertEqual(len(step_in_targets), 3, "expect 3 step in targets") + + # Verify the target names are correct. + self.assertEqual(step_in_targets[0]["label"], "bar()", "expect bar()") + self.assertEqual(step_in_targets[1]["label"], "bar2()", "expect bar2()") + self.assertEqual( + step_in_targets[2]["label"], "foo(int, int)", "expect foo(int, int)" + ) + + # Choose to step into second target and verify that we are in bar2() + self.stepIn(threadId=tid, targetId=step_in_targets[1]["id"], waitForStop=True) + leaf_frame = self.dap_server.get_stackFrame() + self.assertIsNotNone(leaf_frame, "expect a leaf frame") + self.assertEqual(leaf_frame["name"], "bar2()") diff --git a/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp b/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp new file mode 100644 index 00000000000000..d3c3dbcc139ef0 --- /dev/null +++ b/lldb/test/API/tools/lldb-dap/stepInTargets/main.cpp @@ -0,0 +1,11 @@ + +int foo(int val, int extra) { return val + extra; } + +int bar() { return 22; } + +int bar2() { return 54; } + +int main(int argc, char const *argv[]) { + foo(bar(), bar2()); // set breakpoint here + return 0; +} diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index 8015dec9ba8fe6..5c70a056fea4bf 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -162,6 +162,8 @@ struct DAP { std::vector exit_commands; std::vector stop_commands; std::vector terminate_commands; + // Map step in target id to list of function targets that user can choose. + llvm::DenseMap step_in_targets; // A copy of the last LaunchRequest or AttachRequest so we can reuse its // arguments if we get a RestartRequest. std::optional last_launch_or_attach_request; diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index 25c5ad56e3d6fe..d0fbb9155715b1 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -50,6 +50,7 @@ #include #include +#include "lldb/API/SBStream.h" #include "lldb/Host/Config.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -503,6 +504,10 @@ void EventThreadFunction() { SendContinuedEvent(); break; case lldb::eStateExited: + lldb::SBStream stream; + process.GetStatus(stream); + g_dap.SendOutput(OutputType::Console, stream.GetData()); + // When restarting, we can get an "exited" event for the process we // just killed with the old PID, or even with no PID. In that case // we don't have to terminate the session. @@ -1645,7 +1650,7 @@ void request_initialize(const llvm::json::Object &request) { // The debug adapter supports the gotoTargetsRequest. body.try_emplace("supportsGotoTargetsRequest", false); // The debug adapter supports the stepInTargetsRequest. - body.try_emplace("supportsStepInTargetsRequest", false); + body.try_emplace("supportsStepInTargetsRequest", true); // The debug adapter supports the completions request. body.try_emplace("supportsCompletionsRequest", true); // The debug adapter supports the disassembly request. @@ -3180,14 +3185,155 @@ void request_stepIn(const llvm::json::Object &request) { llvm::json::Object response; FillResponse(request, response); auto arguments = request.getObject("arguments"); + + std::string step_in_target; + uint64_t target_id = GetUnsigned(arguments, "targetId", 0); + auto it = g_dap.step_in_targets.find(target_id); + if (it != g_dap.step_in_targets.end()) + step_in_target = it->second; + + const bool single_thread = GetBoolean(arguments, "singleThread", false); + lldb::RunMode run_mode = + single_thread ? lldb::eOnlyThisThread : lldb::eOnlyDuringStepping; lldb::SBThread thread = g_dap.GetLLDBThread(*arguments); if (thread.IsValid()) { // Remember the thread ID that caused the resume so we can set the // "threadCausedFocus" boolean value in the "stopped" events. g_dap.focus_tid = thread.GetThreadID(); - thread.StepInto(); + thread.StepInto(step_in_target.c_str(), run_mode); + } else { + response["success"] = llvm::json::Value(false); + } + g_dap.SendJSON(llvm::json::Value(std::move(response))); +} + +// "StepInTargetsRequest": { +// "allOf": [ { "$ref": "#/definitions/Request" }, { +// "type": "object", +// "description": "This request retrieves the possible step-in targets for +// the specified stack frame.\nThese targets can be used in the `stepIn` +// request.\nClients should only call this request if the corresponding +// capability `supportsStepInTargetsRequest` is true.", "properties": { +// "command": { +// "type": "string", +// "enum": [ "stepInTargets" ] +// }, +// "arguments": { +// "$ref": "#/definitions/StepInTargetsArguments" +// } +// }, +// "required": [ "command", "arguments" ] +// }] +// }, +// "StepInTargetsArguments": { +// "type": "object", +// "description": "Arguments for `stepInTargets` request.", +// "properties": { +// "frameId": { +// "type": "integer", +// "description": "The stack frame for which to retrieve the possible +// step-in targets." +// } +// }, +// "required": [ "frameId" ] +// }, +// "StepInTargetsResponse": { +// "allOf": [ { "$ref": "#/definitions/Response" }, { +// "type": "object", +// "description": "Response to `stepInTargets` request.", +// "properties": { +// "body": { +// "type": "object", +// "properties": { +// "targets": { +// "type": "array", +// "items": { +// "$ref": "#/definitions/StepInTarget" +// }, +// "description": "The possible step-in targets of the specified +// source location." +// } +// }, +// "required": [ "targets" ] +// } +// }, +// "required": [ "body" ] +// }] +// } +void request_stepInTargets(const llvm::json::Object &request) { + llvm::json::Object response; + FillResponse(request, response); + auto arguments = request.getObject("arguments"); + + g_dap.step_in_targets.clear(); + lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments); + if (frame.IsValid()) { + lldb::SBAddress pc_addr = frame.GetPCAddress(); + lldb::SBAddress line_end_addr = + pc_addr.GetLineEntry().GetSameLineContiguousAddressRangeEnd(true); + lldb::SBInstructionList insts = g_dap.target.ReadInstructions( + pc_addr, line_end_addr, /*flavor_string=*/nullptr); + + if (!insts.IsValid()) { + response["success"] = false; + response["message"] = "Failed to get instructions for frame."; + g_dap.SendJSON(llvm::json::Value(std::move(response))); + return; + } + + llvm::json::Array step_in_targets; + const auto num_insts = insts.GetSize(); + for (size_t i = 0; i < num_insts; ++i) { + lldb::SBInstruction inst = insts.GetInstructionAtIndex(i); + if (!inst.IsValid()) + break; + + lldb::addr_t inst_addr = inst.GetAddress().GetLoadAddress(g_dap.target); + + // Note: currently only x86/x64 supports flow kind. + lldb::InstructionControlFlowKind flow_kind = + inst.GetControlFlowKind(g_dap.target); + if (flow_kind == lldb::eInstructionControlFlowKindCall) { + // Use call site instruction address as id which is easy to debug. + llvm::json::Object step_in_target; + step_in_target["id"] = inst_addr; + + llvm::StringRef call_operand_name = inst.GetOperands(g_dap.target); + lldb::addr_t call_target_addr; + if (call_operand_name.getAsInteger(0, call_target_addr)) + continue; + + lldb::SBAddress call_target_load_addr = + g_dap.target.ResolveLoadAddress(call_target_addr); + if (!call_target_load_addr.IsValid()) + continue; + + // The existing ThreadPlanStepInRange only accept step in target + // function with debug info. + lldb::SBSymbolContext sc = g_dap.target.ResolveSymbolContextForAddress( + call_target_load_addr, lldb::eSymbolContextFunction); + + // The existing ThreadPlanStepInRange only accept step in target + // function with debug info. + std::string step_in_target_name; + if (sc.IsValid() && sc.GetFunction().IsValid()) + step_in_target_name = sc.GetFunction().GetDisplayName(); + + // Skip call sites if we fail to resolve its symbol name. + if (step_in_target_name.empty()) + continue; + + g_dap.step_in_targets.try_emplace(inst_addr, step_in_target_name); + step_in_target.try_emplace("label", step_in_target_name); + step_in_targets.emplace_back(std::move(step_in_target)); + } + } + llvm::json::Object body; + body.try_emplace("targets", std::move(step_in_targets)); + response.try_emplace("body", std::move(body)); } else { response["success"] = llvm::json::Value(false); + response["message"] = "Failed to get frame for input frameId."; } g_dap.SendJSON(llvm::json::Value(std::move(response))); } @@ -3904,6 +4050,7 @@ void RegisterRequestCallbacks() { g_dap.RegisterRequestCallback("source", request_source); g_dap.RegisterRequestCallback("stackTrace", request_stackTrace); g_dap.RegisterRequestCallback("stepIn", request_stepIn); + g_dap.RegisterRequestCallback("stepInTargets", request_stepInTargets); g_dap.RegisterRequestCallback("stepOut", request_stepOut); g_dap.RegisterRequestCallback("threads", request_threads); g_dap.RegisterRequestCallback("variables", request_variables); diff --git a/lldb/unittests/Host/linux/HostTest.cpp b/lldb/unittests/Host/linux/HostTest.cpp index 733909902474d7..8ecaf3ec0decb0 100644 --- a/lldb/unittests/Host/linux/HostTest.cpp +++ b/lldb/unittests/Host/linux/HostTest.cpp @@ -69,17 +69,21 @@ TEST_F(HostTest, GetProcessInfo) { EXPECT_EQ(HostInfo::GetArchitecture(HostInfo::eArchKindDefault), Info.GetArchitecture()); // Test timings - /* - * This is flaky in the buildbots on all archs + // In some sense this is a pretty trivial test. What it is trying to + // accomplish is just to validate that these values are never decreasing + // which would be unambiguously wrong. We can not reliably show them + // to be always increasing because the microsecond granularity means that, + // with hardware variations the number of loop iterations need to always + // be increasing for faster and faster machines. ASSERT_TRUE(Host::GetProcessInfo(getpid(), Info)); ProcessInstanceInfo::timespec user_time = Info.GetUserTime(); static volatile unsigned u = 0; for (unsigned i = 0; i < 10'000'000; i++) { - u = i; + u += i; } + ASSERT_TRUE(u > 0); ASSERT_TRUE(Host::GetProcessInfo(getpid(), Info)); ProcessInstanceInfo::timespec next_user_time = Info.GetUserTime(); - ASSERT_TRUE(user_time.tv_sec < next_user_time.tv_sec || - user_time.tv_usec < next_user_time.tv_usec); - */ + ASSERT_TRUE(user_time.tv_sec <= next_user_time.tv_sec || + user_time.tv_usec <= next_user_time.tv_usec); } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ff7e8f7be58c6c..a4f3fd099167a2 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14143,6 +14143,41 @@ Semantics: """""""""" See description of '``llvm.instrprof.increment``' intrinsic. +'``llvm.instrprof.callsite``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof.callsite(ptr , i64 , + i32 , + i32 , ptr ) + +Overview: +""""""""" + +.. FIXME: detail when it's emitted once the support is added + +The '``llvm.instrprof.callsite``' intrinsic should be emitted before a callsite +that's not to a "fake" callee (like another intrinsic or asm). + +Arguments: +"""""""""" +The first 4 arguments are similar to ``llvm.instrprof.increment``. The indexing +is specific to callsites, meaning callsites are indexed from 0, independent from +the indexes used by the other intrinsics (such as +``llvm.instrprof.increment[.step]``). + +The last argument is the called value of the callsite this intrinsic precedes. + +Semantics: +"""""""""" +.. FIXME: detail how when the lowering pass is added. + +This is lowered by contextual profiling. + '``llvm.instrprof.timestamp``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 6ef6ec20da671d..1be8db602a15c4 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -112,6 +112,11 @@ Changes to the RISC-V Backend * The experimental Ssqosid extension is supported. * Zacas is no longer experimental. * Added the CSR names from the Resumable Non-Maskable Interrupts (Smrnmi) extension. +* The default atomics mapping was changed to emit an additional trailing fence + for sequentially consistent stores, offering compatibility with a future + mapping using load-acquire and store-release instructions while remaining + fully compatible with objects produced prior to this change. The mapping + (ABI) used is recorded as an ELF attribute. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 70d6b09a0c895c..e7c71041454557 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -999,6 +999,13 @@ END_TWO_BYTE_PACK() /// If Flags is not in a defined state then this has no effect. void intersectFlagsWith(const SDNodeFlags Flags); + bool hasPoisonGeneratingFlags() const { + SDNodeFlags Flags = getFlags(); + return Flags.hasNoUnsignedWrap() || Flags.hasNoSignedWrap() || + Flags.hasExact() || Flags.hasDisjoint() || Flags.hasNonNeg() || + Flags.hasNoNaNs() || Flags.hasNoInfs(); + } + void setCFIType(uint32_t Type) { CFIType = Type; } uint32_t getCFIType() const { return CFIType; } diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 60f41b30e91c24..cb514cde95b51a 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -46,6 +46,7 @@ typedef unsigned ID; class AssemblyAnnotationWriter; class Constant; +class ConstantRange; struct DenormalMode; class DISubprogram; enum LibFunc : unsigned; @@ -462,6 +463,9 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, /// attributes for the given arg. void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes); + /// adds the range attribute to the list of attributes for the return value. + void addRangeRetAttr(const ConstantRange &CR); + MaybeAlign getParamAlign(unsigned ArgNo) const { return AttributeSets.getParamAlignment(ArgNo); } diff --git a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h index a3ebde709ae6e1..7525c9eb758bef 100644 --- a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h +++ b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h @@ -76,7 +76,7 @@ void GenericConvergenceVerifier::visit(const InstructionT &I) { "Entry intrinsic cannot be preceded by a convergent operation in the " "same basic block.", {Context.print(&I)}); - LLVM_FALLTHROUGH; + [[fallthrough]]; case CONV_ANCHOR: Check(!TokenDef, "Entry or anchor intrinsic cannot have a convergencectrl token " diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 01f24f70b15756..c3f97026c84dc2 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1497,6 +1497,7 @@ class InstrProfInstBase : public IntrinsicInst { case Intrinsic::instrprof_cover: case Intrinsic::instrprof_increment: case Intrinsic::instrprof_increment_step: + case Intrinsic::instrprof_callsite: case Intrinsic::instrprof_timestamp: case Intrinsic::instrprof_value_profile: return true; @@ -1581,6 +1582,21 @@ class InstrProfIncrementInstStep : public InstrProfIncrementInst { } }; +/// This represents the llvm.instrprof.callsite intrinsic. +/// It is structurally like the increment or step counters, hence the +/// inheritance relationship, albeit somewhat tenuous (it's not 'counting' per +/// se) +class InstrProfCallsite : public InstrProfCntrInstBase { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_callsite; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + Value *getCallee() const; +}; + /// This represents the llvm.instrprof.timestamp intrinsic. class InstrProfTimestampInst : public InstrProfCntrInstBase { public: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 9d7ab0fd2050bc..16f3f9d9fcc67e 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -914,6 +914,11 @@ def int_instrprof_increment_step : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>; +// Callsite instrumentation for contextual profiling +def int_instrprof_callsite : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty]>; + // A timestamp for instrumentation based profiling. def int_instrprof_timestamp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty]>; diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h index 1309b17bff9c14..1ff029d44d376c 100644 --- a/llvm/include/llvm/MC/MCELFStreamer.h +++ b/llvm/include/llvm/MC/MCELFStreamer.h @@ -156,7 +156,7 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, std::unique_ptr TAB, std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll, bool IsThumb, bool IsAndroid); + bool IsThumb, bool IsAndroid); } // end namespace llvm diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h index 47051447404d00..5038b87cd1dc90 100644 --- a/llvm/include/llvm/MC/TargetRegistry.h +++ b/llvm/include/llvm/MC/TargetRegistry.h @@ -92,39 +92,33 @@ createAsmStreamer(MCContext &Ctx, std::unique_ptr OS, MCStreamer *createELFStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCStreamer *createGOFFStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCStreamer *createMachOStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, std::unique_ptr &&CE, - bool RelaxAll, bool DWARFMustBeAtTheEnd, + bool DWARFMustBeAtTheEnd, bool LabelSections = false); MCStreamer *createWasmStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCStreamer *createXCOFFStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCStreamer *createSPIRVStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCStreamer *createDXContainerStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll); + std::unique_ptr &&CE); MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx); @@ -199,42 +193,42 @@ class Target { MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); + std::unique_ptr &&Emitter); using GOFFStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); + std::unique_ptr &&Emitter); using MachOStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, bool DWARFMustBeAtTheEnd); using COFFStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, bool IncrementalLinkerCompatible); using WasmStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); + std::unique_ptr &&Emitter); using XCOFFStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); + std::unique_ptr &&Emitter); using SPIRVStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); - + std::unique_ptr &&Emitter); + using DXContainerStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll); + std::unique_ptr &&Emitter); using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S); using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)( @@ -566,7 +560,7 @@ class Target { std::unique_ptr &&TAB, std::unique_ptr &&OW, std::unique_ptr &&Emitter, - const MCSubtargetInfo &STI, bool RelaxAll, + const MCSubtargetInfo &STI, bool, bool IncrementalLinkerCompatible, bool DWARFMustBeAtTheEnd) const { MCStreamer *S = nullptr; @@ -577,66 +571,63 @@ class Target { assert((T.isOSWindows() || T.isUEFI()) && "only Windows and UEFI COFF are supported"); S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll, - IncrementalLinkerCompatible); + std::move(Emitter), IncrementalLinkerCompatible); break; case Triple::MachO: if (MachOStreamerCtorFn) S = MachOStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll, - DWARFMustBeAtTheEnd); + std::move(Emitter), DWARFMustBeAtTheEnd); else S = createMachOStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll, - DWARFMustBeAtTheEnd); + std::move(Emitter), DWARFMustBeAtTheEnd); break; case Triple::ELF: if (ELFStreamerCtorFn) S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createELFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; case Triple::Wasm: if (WasmStreamerCtorFn) S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; case Triple::GOFF: if (GOFFStreamerCtorFn) S = GOFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createGOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; case Triple::XCOFF: if (XCOFFStreamerCtorFn) S = XCOFFStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; case Triple::SPIRV: if (SPIRVStreamerCtorFn) S = SPIRVStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createSPIRVStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; case Triple::DXContainer: if (DXContainerStreamerCtorFn) S = DXContainerStreamerCtorFn(T, Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createDXContainerStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); break; } if (ObjectTargetStreamerCtorFn) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 4494d9b96189bc..1d457be93741f2 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -801,8 +801,9 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { } else if (EF.getHeader().e_machine == ELF::EM_RISCV) { if (Expected NameOrErr = getSymbolName(Sym)) { StringRef Name = *NameOrErr; - // Mark fake labels (used for label differences) and mapping symbols. - if (Name == ".L0 " || Name.starts_with("$d") || Name.starts_with("$x")) + // Mark empty name symbols (used for label differences) and mapping + // symbols. + if (Name.empty() || Name.starts_with("$d") || Name.starts_with("$x")) Result |= SymbolRef::SF_FormatSpecific; } else { // TODO: Actually report errors helpfully. diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index a8e98f8bb13861..d378c3696f8d0b 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -44,6 +44,12 @@ enum class Meta : uint64_t { using MemProfSchema = llvm::SmallVector(Meta::Size)>; +// Returns the full schema currently in use. +MemProfSchema getFullSchema(); + +// Returns the schema consisting of the fields used for hot cold memory hinting. +MemProfSchema getHotColdSchema(); + // Holds the actual MemInfoBlock data with all fields. Contents may be read or // written partially by providing an appropriate schema to the serialize and // deserialize methods. @@ -116,22 +122,6 @@ struct PortableMemInfoBlock { void clear() { *this = PortableMemInfoBlock(); } - // Returns the full schema currently in use. - static MemProfSchema getFullSchema() { - MemProfSchema List; -#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - return List; - } - - // Returns the schema consisting of the fields currently consumed by the - // compiler. - static MemProfSchema getHotColdSchema() { - return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, - Meta::TotalLifetimeAccessDensity}; - } - bool operator==(const PortableMemInfoBlock &Other) const { #define MIBEntryDef(NameTag, Name, Type) \ if (Other.get##Name() != get##Name()) \ diff --git a/llvm/include/llvm/Support/RISCVAttributeParser.h b/llvm/include/llvm/Support/RISCVAttributeParser.h index 305adffbe851e7..9f295504de959e 100644 --- a/llvm/include/llvm/Support/RISCVAttributeParser.h +++ b/llvm/include/llvm/Support/RISCVAttributeParser.h @@ -24,6 +24,7 @@ class RISCVAttributeParser : public ELFAttributeParser { Error unalignedAccess(unsigned tag); Error stackAlign(unsigned tag); + Error atomicAbi(unsigned tag); public: RISCVAttributeParser(ScopedPrinter *sw) diff --git a/llvm/include/llvm/Support/RISCVAttributes.h b/llvm/include/llvm/Support/RISCVAttributes.h index 18f5a84d21f250..5def890a727355 100644 --- a/llvm/include/llvm/Support/RISCVAttributes.h +++ b/llvm/include/llvm/Support/RISCVAttributes.h @@ -32,8 +32,21 @@ enum AttrType : unsigned { PRIV_SPEC = 8, PRIV_SPEC_MINOR = 10, PRIV_SPEC_REVISION = 12, + ATOMIC_ABI = 14, }; +namespace RISCVAtomicAbiTag { +enum AtomicABI : unsigned { + // Values for Tag_RISCV_atomic_abi + // Defined at + // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#tag_riscv_atomic_abi-14-uleb128version + UNKNOWN = 0, + A6C = 1, + A6S = 2, + A7 = 3, +}; +} // namespace RISCVAtomicAbiTag + enum { NOT_ALLOWED = 0, ALLOWED = 1 }; } // namespace RISCVAttrs diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index ea3520835fa07d..1684b424e3b442 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1979,6 +1979,7 @@ class Pattern resultInstrs> { list ResultInstrs = resultInstrs; list Predicates = []; // See class Instruction in Target.td. int AddedComplexity = 0; // See class Instruction in Target.td. + bit GISelShouldIgnore = 0; } // Pat - A simple (but common) form of a pattern, which produces a simple result diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index b1ba8e7c0f6014..f65515ca387229 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef UnderlyingObjects, namespace { struct DepDistanceStrideAndSizeInfo { const SCEV *Dist; - uint64_t Stride; + uint64_t StrideA; + uint64_t StrideB; uint64_t TypeByteSize; bool AIsWrite; bool BIsWrite; - DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride, - uint64_t TypeByteSize, bool AIsWrite, - bool BIsWrite) - : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize), - AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} + DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA, + uint64_t StrideB, uint64_t TypeByteSize, + bool AIsWrite, bool BIsWrite) + : Dist(Dist), StrideA(StrideA), StrideB(StrideB), + TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; } // namespace -// Get the dependence distance, stride, type size and whether it is a write for +// Get the dependence distance, strides, type size and whether it is a write for // the dependence between A and B. Returns a DepType, if we can prove there's // no dependence or the analysis fails. Outlined to lambda to limit he scope // of various temporary variables, like A/BPtr, StrideA/BPtr and others. @@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize( InnermostLoop)) return MemoryDepChecker::Dependence::IndirectUnsafe; - // Need accesses with constant stride. We don't want to vectorize - // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap - // in the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { + // Need accesses with constant strides and the same direction. We don't want + // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that + // could wrap in the address space. + if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || + (StrideAPtr < 0 && StrideBPtr > 0)) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return MemoryDepChecker::Dependence::Unknown; } @@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - uint64_t Stride = std::abs(StrideAPtr); - return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite, - BIsWrite); + return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), + std::abs(StrideBPtr), TypeByteSize, + AIsWrite, BIsWrite); } MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( @@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( if (std::holds_alternative(Res)) return std::get(Res); - const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = + const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; + std::optional CommonStride = + StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; + if (isa(Dist)) { + // TODO: Relax requirement that there is a common stride to retry with + // non-constant distance dependencies. + FoundNonConstantDistanceDependence |= !!CommonStride; + LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); + return Dependence::Unknown; + } + ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + // If the distance between the acecsses is larger than their absolute stride // multiplied by the backedge taken count, the accesses are independet, i.e. // they are far enough appart that accesses won't access the same location // across all loop ierations. - if (!isa(Dist) && HasSameSize && + if (HasSameSize && CommonStride && isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist, - Stride, TypeByteSize)) + *CommonStride, TypeByteSize)) return Dependence::NoDep; const SCEVConstant *C = dyn_cast(Dist); - if (!C) { - LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); - FoundNonConstantDistanceDependence = true; - return Dependence::Unknown; - } - const APInt &Val = C->getAPInt(); - int64_t Distance = Val.getSExtValue(); - - // If the distance between accesses and their strides are known constants, - // check whether the accesses interlace each other. - if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize && - areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { - LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); - return Dependence::NoDep; + // Attempt to prove strided accesses independent. + if (C) { + const APInt &Val = C->getAPInt(); + int64_t Distance = Val.getSExtValue(); + + // If the distance between accesses and their strides are known constants, + // check whether the accesses interlace each other. + if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 && + HasSameSize && + areStridedAccessesIndependent(std::abs(Distance), *CommonStride, + TypeByteSize)) { + LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); + return Dependence::NoDep; + } } // Negative distances are not plausible dependencies. - if (Val.isNegative()) { + if (SE.isKnownNonPositive(Dist)) { + if (SE.isKnownNonNegative(Dist)) { + if (HasSameSize) { + // Write to the same location with the same size. + return Dependence::Forward; + } else { + LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " + "different type sizes\n"); + return Dependence::Unknown; + } + } + bool IsTrueDataDependence = (AIsWrite && !BIsWrite); // Check if the first access writes to a location that is read in a later // iteration, where the distance between them is not a multiple of a vector @@ -2071,27 +2095,40 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a // forward dependency will allow vectorization using any width. - if (IsTrueDataDependence && EnableForwardingConflictDetection && - (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(), - TypeByteSize))) { - LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); - return Dependence::ForwardButPreventsForwarding; + + if (IsTrueDataDependence && EnableForwardingConflictDetection) { + if (!C) { + // TODO: FoundNonConstantDistanceDependence is used as a necessary + // condition to consider retrying with runtime checks. Historically, we + // did not set it when strides were different but there is no inherent + // reason to. + FoundNonConstantDistanceDependence |= CommonStride.has_value(); + return Dependence::Unknown; + } + if (!HasSameSize || + couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(), + TypeByteSize)) { + LLVM_DEBUG( + dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); + return Dependence::ForwardButPreventsForwarding; + } } LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); return Dependence::Forward; } - // Write to the same location with the same size. - if (Val == 0) { - if (HasSameSize) - return Dependence::Forward; - LLVM_DEBUG( - dbgs() << "LAA: Zero dependence difference but different type sizes\n"); + if (!C) { + // TODO: FoundNonConstantDistanceDependence is used as a necessary condition + // to consider retrying with runtime checks. Historically, we did not set it + // when strides were different but there is no inherent reason to. + FoundNonConstantDistanceDependence |= CommonStride.has_value(); + LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); return Dependence::Unknown; } - assert(Val.isStrictlyPositive() && "Expect a positive value"); + if (!SE.isKnownPositive(Dist)) + return Dependence::Unknown; if (!HasSameSize) { LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " @@ -2099,6 +2136,14 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return Dependence::Unknown; } + // The logic below currently only supports StrideA == StrideB, i.e. there's a + // common stride. + if (!CommonStride) + return Dependence::Unknown; + + const APInt &Val = C->getAPInt(); + int64_t Distance = Val.getSExtValue(); + // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); @@ -2134,7 +2179,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. uint64_t MinDistanceNeeded = - TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; + TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast(Distance)) { LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'); @@ -2183,7 +2228,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits // since there is a backwards dependency. - uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride); + uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride)); LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 22a766f8d62524..8eaf78157550ee 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1431,10 +1431,8 @@ static bool SinkCast(CastInst *CI) { if (!InsertedCast) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); assert(InsertPt != UserBB->end()); - InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), - CI->getType(), ""); + InsertedCast = cast(CI->clone()); InsertedCast->insertBefore(*UserBB, InsertPt); - InsertedCast->setDebugLoc(CI->getDebugLoc()); } // Replace a use of the cast with a use of the new cast. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index ad23d9d1835bb1..4c8be4d400e40c 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2223,7 +2223,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // addresses. We can treat it like a normal dbg_value intrinsic here; to // benefit from the full analysis of stack/SSA locations, GlobalISel would // need to register for and use the AssignmentTrackingAnalysis pass. - LLVM_FALLTHROUGH; + [[fallthrough]]; case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst &DI = cast(CI); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fd265b12d73ca4..aa746f1c7b7b3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7620,6 +7620,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) { static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { EVT VT = N0.getValueType(); + SDLoc DL(N); auto peekThroughResize = [](SDValue V) { if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE) @@ -7642,16 +7643,16 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, if (SDValue NotOperand = getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false)) { if (peekThroughResize(NotOperand) == N1Resized) - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1); + return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT), + N1); } // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) if (SDValue NotOperand = getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false)) { if (peekThroughResize(NotOperand) == N1Resized) - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1); + return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT), + N1); } } @@ -7659,13 +7660,13 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, // fold or (xor X, N1), N1 --> or X, N1 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1)))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, X, N1); + return DAG.getNode(ISD::OR, DL, VT, X, N1); // fold or (xor x, y), (x and/or y) --> or x, y if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) && (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) || sd_match(N1, m_Or(m_Specific(X), m_Specific(Y))))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Y); + return DAG.getNode(ISD::OR, DL, VT, X, Y); if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 659966b7ba55f5..761e72e6dce702 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1424,7 +1424,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // happened (such as an optimised function being always-inlined into an // optnone function). We will not be using the extra information in the // dbg.assign in that case, just use its dbg.value fields. - LLVM_FALLTHROUGH; + [[fallthrough]]; case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast(II); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 24f69ea1b742a6..8413cd60135f7f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3195,7 +3195,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::BITCAST: if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a4d293687a8739..7c5a280bdd1b9b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5128,6 +5128,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, if (VT.isScalableVector()) return true; + if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) + return true; + unsigned Opcode = Op.getOpcode(); switch (Opcode) { case ISD::FREEZE: @@ -5167,34 +5170,20 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return true; const TargetOptions &Options = getTarget().Options; - return Options.NoNaNsFPMath || Options.NoInfsFPMath || - (ConsiderFlags && - (Op->getFlags().hasNoNaNs() || Op->getFlags().hasNoInfs())); + return Options.NoNaNsFPMath || Options.NoInfsFPMath; } - // Matches hasPoisonGeneratingFlags(). + case ISD::OR: case ISD::ZERO_EXTEND: - return ConsiderFlags && Op->getFlags().hasNonNeg(); - case ISD::ADD: case ISD::SUB: case ISD::MUL: - // Matches hasPoisonGeneratingFlags(). - return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || - Op->getFlags().hasNoUnsignedWrap()); + // No poison except from flags (which is handled above) + return false; case ISD::SHL: // If the max shift amount isn't in range, then the shift can create poison. - if (!getValidMaximumShiftAmountConstant(Op, DemandedElts)) - return true; - - // Matches hasPoisonGeneratingFlags(). - return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || - Op->getFlags().hasNoUnsignedWrap()); - - // Matches hasPoisonGeneratingFlags(). - case ISD::OR: - return ConsiderFlags && Op->getFlags().hasDisjoint(); + return !getValidMaximumShiftAmountConstant(Op, DemandedElts); case ISD::SCALAR_TO_VECTOR: // Check if we demand any upper (undef) elements. diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 2c87b344083edb..bba3329e8cc269 100644 --- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -16,6 +16,10 @@ #if defined(LLVM_ON_UNIX) && !defined(__ANDROID__) #include #include +#if defined(__MVS__) +#include "llvm/Support/BLAKE3.h" +#include +#endif #include #elif defined(_WIN32) #include @@ -239,6 +243,24 @@ void SharedMemoryMapper::reserve(size_t NumBytes, #if defined(LLVM_ON_UNIX) +#if defined(__MVS__) + ArrayRef Data( + reinterpret_cast(SharedMemoryName.c_str()), + SharedMemoryName.size()); + auto HashedName = BLAKE3::hash(Data); + key_t Key = *reinterpret_cast(HashedName.data()); + int SharedMemoryId = + shmget(Key, NumBytes, IPC_CREAT | __IPC_SHAREAS | 0700); + if (SharedMemoryId < 0) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } + LocalAddr = shmat(SharedMemoryId, nullptr, 0); + if (LocalAddr == reinterpret_cast(-1)) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } +#else int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR, 0700); if (SharedMemoryFile < 0) { return OnReserved(errorCodeToError(errnoAsErrorCode())); @@ -254,6 +276,7 @@ void SharedMemoryMapper::reserve(size_t NumBytes, } close(SharedMemoryFile); +#endif #elif defined(_WIN32) @@ -373,8 +396,13 @@ void SharedMemoryMapper::release(ArrayRef Bases, #if defined(LLVM_ON_UNIX) +#if defined(__MVS__) + if (shmdt(Reservations[Base].LocalAddr) < 0) + Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode())); +#else if (munmap(Reservations[Base].LocalAddr, Reservations[Base].Size) != 0) Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode())); +#endif #elif defined(_WIN32) @@ -415,7 +443,11 @@ SharedMemoryMapper::~SharedMemoryMapper() { #if defined(LLVM_ON_UNIX) && !defined(__ANDROID__) +#if defined(__MVS__) + shmdt(R.second.LocalAddr); +#else munmap(R.second.LocalAddr, R.second.Size); +#endif #elif defined(_WIN32) diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 6614beec760fb3..f5118c0f2bfa43 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -18,6 +18,10 @@ #include #include #include +#if defined(__MVS__) +#include "llvm/Support/BLAKE3.h" +#include +#endif #include #endif @@ -59,6 +63,21 @@ ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { SharedMemoryName = SharedMemoryNameStream.str(); } +#if defined(__MVS__) + ArrayRef Data( + reinterpret_cast(SharedMemoryName.c_str()), + SharedMemoryName.size()); + auto HashedName = BLAKE3::hash(Data); + key_t Key = *reinterpret_cast(HashedName.data()); + int SharedMemoryId = + shmget(Key, Size, IPC_CREAT | IPC_EXCL | __IPC_SHAREAS | 0700); + if (SharedMemoryId < 0) + return errorCodeToError(errnoAsErrorCode()); + + void *Addr = shmat(SharedMemoryId, nullptr, 0); + if (Addr == reinterpret_cast(-1)) + return errorCodeToError(errnoAsErrorCode()); +#else int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0700); if (SharedMemoryFile < 0) @@ -73,6 +92,7 @@ ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { return errorCodeToError(errnoAsErrorCode()); close(SharedMemoryFile); +#endif #elif defined(_WIN32) @@ -131,6 +151,9 @@ Expected ExecutorSharedMemoryMapperService::initialize( #if defined(LLVM_ON_UNIX) +#if defined(__MVS__) + // TODO Is it possible to change the protection level? +#else int NativeProt = 0; if ((Segment.RAG.Prot & MemProt::Read) == MemProt::Read) NativeProt |= PROT_READ; @@ -141,6 +164,7 @@ Expected ExecutorSharedMemoryMapperService::initialize( if (mprotect(Segment.Addr.toPtr(), Segment.Size, NativeProt)) return errorCodeToError(errnoAsErrorCode()); +#endif #elif defined(_WIN32) @@ -239,8 +263,15 @@ Error ExecutorSharedMemoryMapperService::release( #if defined(LLVM_ON_UNIX) +#if defined(__MVS__) + (void)Size; + + if (shmdt(Base.toPtr()) < 0) + Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode())); +#else if (munmap(Base.toPtr(), Size) != 0) Err = joinErrors(std::move(Err), errorCodeToError(errnoAsErrorCode())); +#endif #elif defined(_WIN32) (void)Size; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index e66fe73425e863..545940dd86f90f 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -735,6 +735,10 @@ void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo, ArgNo, Bytes); } +void Function::addRangeRetAttr(const ConstantRange &CR) { + AttributeSets = AttributeSets.addRangeRetAttr(getContext(), CR); +} + DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const { if (&FPType == &APFloat::IEEEsingle()) { DenormalMode Mode = getDenormalModeF32Raw(); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 89403e1d7fcb4d..8faeb4e9951f74 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -291,6 +291,12 @@ Value *InstrProfIncrementInst::getStep() const { return ConstantInt::get(Type::getInt64Ty(Context), 1); } +Value *InstrProfCallsite::getCallee() const { + if (isa(this)) + return getArgOperand(4); + return nullptr; +} + std::optional ConstrainedFPIntrinsic::getRoundingMode() const { unsigned NumOperands = arg_size(); Metadata *MD = nullptr; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index bcca01ba7f3fdc..f28c157e035c7d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6308,7 +6308,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { break; } case Intrinsic::experimental_convergence_entry: - LLVM_FALLTHROUGH; case Intrinsic::experimental_convergence_anchor: break; case Intrinsic::experimental_convergence_loop: diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index b8ef2654ed6e3b..005521bad6e014 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -725,13 +725,7 @@ void ELFWriter::computeSymbolTable( HasLargeSectionIndex = true; } - // Temporary symbols generated for certain assembler features (.eh_frame, - // .debug_line) of an empty name may be referenced by relocations due to - // linker relaxation. Rename them to ".L0 " to match the gas fake label name - // and allow ld/objcopy --discard-locals to discard such symbols. StringRef Name = Symbol.getName(); - if (Name.empty()) - Name = ".L0 "; // Sections have their own string table if (Symbol.getType() != ELF::STT_SECTION) { diff --git a/llvm/lib/MC/MCDXContainerStreamer.cpp b/llvm/lib/MC/MCDXContainerStreamer.cpp index 3cb452f3dfa554..a596c9a16d498d 100644 --- a/llvm/lib/MC/MCDXContainerStreamer.cpp +++ b/llvm/lib/MC/MCDXContainerStreamer.cpp @@ -21,11 +21,8 @@ void MCDXContainerStreamer::emitInstToData(const MCInst &, MCStreamer *llvm::createDXContainerStreamer( MCContext &Context, std::unique_ptr &&MAB, - std::unique_ptr &&OW, std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&OW, std::unique_ptr &&CE) { auto *S = new MCDXContainerStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index e541090769e9e5..23e926c3a9d14b 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -892,11 +892,8 @@ void MCELFStreamer::createAttributesSection( MCStreamer *llvm::createELFStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&CE) { MCELFStreamer *S = new MCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp index 58d13c9f378853..2b6d5c8e75a706 100644 --- a/llvm/lib/MC/MCGOFFStreamer.cpp +++ b/llvm/lib/MC/MCGOFFStreamer.cpp @@ -24,11 +24,8 @@ MCGOFFStreamer::~MCGOFFStreamer() {} MCStreamer *llvm::createGOFFStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&CE) { MCGOFFStreamer *S = new MCGOFFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index d7d343f15eaa61..10f9988b9d16a0 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -564,7 +564,7 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, std::unique_ptr &&CE, - bool RelaxAll, bool DWARFMustBeAtTheEnd, + bool DWARFMustBeAtTheEnd, bool LabelSections) { MCMachOStreamer *S = new MCMachOStreamer(Context, std::move(MAB), std::move(OW), std::move(CE), @@ -574,8 +574,6 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, Target, Context.getObjectFileInfo()->getSDKVersion(), Context.getObjectFileInfo()->getDarwinTargetVariantTriple(), Context.getObjectFileInfo()->getDarwinTargetVariantSDKVersion()); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 490e0a4dd40417..d2da5d0d3f90f2 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -34,6 +34,8 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, EmitEHFrame(true), EmitDebugFrame(false) { if (Assembler->getBackendPtr()) setAllowAutoPadding(Assembler->getBackend().allowAutoPadding()); + if (Context.getTargetOptions() && Context.getTargetOptions()->MCRelaxAll) + Assembler->setRelaxAll(true); } MCObjectStreamer::~MCObjectStreamer() = default; diff --git a/llvm/lib/MC/MCSPIRVStreamer.cpp b/llvm/lib/MC/MCSPIRVStreamer.cpp index 0bb73c7ff7ee2d..3b75a2e17a4a98 100644 --- a/llvm/lib/MC/MCSPIRVStreamer.cpp +++ b/llvm/lib/MC/MCSPIRVStreamer.cpp @@ -34,11 +34,8 @@ void MCSPIRVStreamer::emitInstToData(const MCInst &Inst, MCStreamer *llvm::createSPIRVStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&CE) { MCSPIRVStreamer *S = new MCSPIRVStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp index fbab72fb5f3d36..c553ede77555a5 100644 --- a/llvm/lib/MC/MCWasmStreamer.cpp +++ b/llvm/lib/MC/MCWasmStreamer.cpp @@ -275,11 +275,8 @@ void MCWasmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, MCStreamer *llvm::createWasmStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&CE) { MCWasmStreamer *S = new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 458b4be6198387..175d7d6b6c31a6 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -162,12 +162,9 @@ void MCXCOFFStreamer::emitInstToData(const MCInst &Inst, MCStreamer *llvm::createXCOFFStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&CE, - bool RelaxAll) { + std::unique_ptr &&CE) { MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB), std::move(OW), std::move(CE)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 70ae57f77daef7..e1846fcbffee52 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -508,7 +508,7 @@ static Error writeMemProfV0( OS.write(0ULL); // Reserve space for the memprof frame payload offset. OS.write(0ULL); // Reserve space for the memprof frame table offset. - auto Schema = memprof::PortableMemInfoBlock::getFullSchema(); + auto Schema = memprof::getFullSchema(); writeMemProfSchema(OS, Schema); uint64_t RecordTableOffset = @@ -534,7 +534,7 @@ static Error writeMemProfV1( OS.write(0ULL); // Reserve space for the memprof frame payload offset. OS.write(0ULL); // Reserve space for the memprof frame table offset. - auto Schema = memprof::PortableMemInfoBlock::getFullSchema(); + auto Schema = memprof::getFullSchema(); writeMemProfSchema(OS, Schema); uint64_t RecordTableOffset = @@ -565,9 +565,9 @@ static Error writeMemProfV2( OS.write(0ULL); // Reserve space for the memprof call stack payload offset. OS.write(0ULL); // Reserve space for the memprof call stack table offset. - auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema(); + auto Schema = memprof::getHotColdSchema(); if (MemProfFullSchema) - Schema = memprof::PortableMemInfoBlock::getFullSchema(); + Schema = memprof::getFullSchema(); writeMemProfSchema(OS, Schema); uint64_t RecordTableOffset = diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 9a46d1151311f4..4667778ca11dd0 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -10,6 +10,19 @@ namespace llvm { namespace memprof { +MemProfSchema getFullSchema() { + MemProfSchema List; +#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); +#include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + return List; +} + +MemProfSchema getHotColdSchema() { + return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, + Meta::TotalLifetimeAccessDensity}; +} + static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, const MemProfSchema &Schema) { size_t Size = 0; diff --git a/llvm/lib/Support/RISCVAttributeParser.cpp b/llvm/lib/Support/RISCVAttributeParser.cpp index 7ce4b6ab161cd3..19c5a0e06903f6 100644 --- a/llvm/lib/Support/RISCVAttributeParser.cpp +++ b/llvm/lib/Support/RISCVAttributeParser.cpp @@ -36,7 +36,18 @@ const RISCVAttributeParser::DisplayHandler { RISCVAttrs::UNALIGNED_ACCESS, &RISCVAttributeParser::unalignedAccess, - }}; + }, + { + RISCVAttrs::ATOMIC_ABI, + &RISCVAttributeParser::atomicAbi, + }, +}; + +Error RISCVAttributeParser::atomicAbi(unsigned Tag) { + uint64_t Value = de.getULEB128(cursor); + printAttribute(Tag, Value, "Atomic ABI is " + utostr(Value)); + return Error::success(); +} Error RISCVAttributeParser::unalignedAccess(unsigned tag) { static const char *strings[] = {"No unaligned access", "Unaligned access"}; diff --git a/llvm/lib/Support/RISCVAttributes.cpp b/llvm/lib/Support/RISCVAttributes.cpp index 9e629760d3d842..dc70d65acba063 100644 --- a/llvm/lib/Support/RISCVAttributes.cpp +++ b/llvm/lib/Support/RISCVAttributes.cpp @@ -18,6 +18,7 @@ static constexpr TagNameItem tagData[] = { {PRIV_SPEC, "Tag_priv_spec"}, {PRIV_SPEC_MINOR, "Tag_priv_spec_minor"}, {PRIV_SPEC_REVISION, "Tag_priv_spec_revision"}, + {ATOMIC_ABI, "Tag_atomic_abi"}, }; constexpr TagNameMap RISCVAttributeTags{tagData}; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 419c141121c325..c86c98eed24f08 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1296,7 +1296,7 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, } case AArch64::LDPQpost: Imm = -Imm; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AArch64::STPQpre: { unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 223898e9d634d8..8e9782c1930c3c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18663,14 +18663,12 @@ static SDValue performConcatVectorsCombine(SDNode *N, if (DCI.isBeforeLegalizeOps()) return SDValue(); - // Optimise concat_vectors of two [us]avgceils or [us]avgfloors with a 128-bit - // destination size, combine into an avg of two contacts of the source - // vectors. eg: concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), - // concat(b, d)) + // Optimise concat_vectors of two identical binops with a 128-bit destination + // size, combine into an binop of two contacts of the source vectors. eg: + // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d)) if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() && - (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS || - N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS) && - N0->hasOneUse() && N1->hasOneUse()) { + DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() && + N1->hasOneUse()) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); SDValue N10 = N1->getOperand(0); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index ad21f2673a6412..f5bea3336cbf73 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -306,13 +306,12 @@ llvm::createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, return new AArch64TargetAsmStreamer(S, OS); } -MCELFStreamer *llvm::createAArch64ELFStreamer( - MCContext &Context, std::unique_ptr TAB, - std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll) { +MCELFStreamer * +llvm::createAArch64ELFStreamer(MCContext &Context, + std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter) { AArch64ELFStreamer *S = new AArch64ELFStreamer( Context, std::move(TAB), std::move(OW), std::move(Emitter)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 25c609ee1496b3..e6df79ba19d4cf 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -20,8 +20,7 @@ namespace llvm { MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, std::unique_ptr TAB, std::unique_ptr OW, - std::unique_ptr Emitter, - bool RelaxAll); + std::unique_ptr Emitter); } #endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 043f0a03b79756..0dd4a78f962d41 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -378,30 +378,28 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T, static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { return createAArch64ELFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); } static MCStreamer *createMachOStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, std::unique_ptr &&Emitter, - bool RelaxAll, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll, DWARFMustBeAtTheEnd, + std::move(Emitter), DWARFMustBeAtTheEnd, /*LabelSections*/ true); } static MCStreamer * createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr &&TAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, bool IncrementalLinkerCompatible) { return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), std::move(OW), - std::move(Emitter), RelaxAll, + std::move(Emitter), IncrementalLinkerCompatible); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index 438ac6cc47885e..c25cc2e99adcab 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -294,7 +294,7 @@ void AArch64TargetWinCOFFStreamer::emitARM64WinCFISaveAnyRegQPX(unsigned Reg, MCWinCOFFStreamer *llvm::createAArch64WinCOFFStreamer( MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll, bool IncrementalLinkerCompatible) { + bool IncrementalLinkerCompatible) { auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), std::move(OW)); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h index 8c0656652eed2a..a13b1a451be5fd 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -21,7 +21,7 @@ namespace llvm { MCWinCOFFStreamer *createAArch64WinCOFFStreamer( MCContext &Context, std::unique_ptr TAB, std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll, bool IncrementalLinkerCompatible); + bool IncrementalLinkerCompatible); } // end llvm namespace #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2b81f5d51032dd..8abe9920c02c2d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1493,6 +1493,7 @@ def FeatureISAVersion11_Generic: FeatureSet< [FeatureMSAALoadDstSelBug, FeatureVALUTransUseHazard, FeatureUserSGPRInit16Bug, + FeatureMADIntraFwdBug, FeaturePrivEnabledTrap2NopBug, FeatureRequiresCOV6])>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index aa4ec785bf02a3..56345d14a331ca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2261,7 +2261,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case AMDGPU::G_FCMP: if (!Subtarget.hasSALUFloatInsts()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case AMDGPU::G_ICMP: case AMDGPU::G_UADDO: case AMDGPU::G_USUBO: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp index 1ce7012040daa4..4e9a33227a5dcb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp @@ -28,10 +28,11 @@ class AMDGPUELFStreamer : public MCELFStreamer { } -MCELFStreamer *llvm::createAMDGPUELFStreamer( - const Triple &T, MCContext &Context, std::unique_ptr MAB, - std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll) { +MCELFStreamer * +llvm::createAMDGPUELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter) { return new AMDGPUELFStreamer(T, Context, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h index e09e2dca1b47af..f9ece5f22b0f7a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h @@ -26,8 +26,7 @@ class Triple; MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, - std::unique_ptr Emitter, - bool RelaxAll); + std::unique_ptr Emitter); } // namespace llvm. #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 4700a984770bfb..30dd384051b940 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -112,10 +112,9 @@ static MCTargetStreamer *createAMDGPUNullTargetStreamer(MCStreamer &S) { static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { return createAMDGPUELFStreamer(T, Context, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); } namespace { diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 028db9d17e300a..e54314cc7d00af 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -11104,7 +11104,7 @@ ARMAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, return Match_MnemonicFail; } } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: return Match_Success; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 1d80af590d16e0..afd7dccbeca9b3 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1487,8 +1487,7 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, std::unique_ptr TAB, std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll, bool IsThumb, - bool IsAndroid) { + bool IsThumb, bool IsAndroid) { ARMELFStreamer *S = new ARMELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter), IsThumb, IsAndroid); @@ -1497,8 +1496,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 8d9959a9457dbe..20603b6cf1b0bb 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -359,10 +359,9 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { return createARMELFStreamer( - Ctx, std::move(MAB), std::move(OW), std::move(Emitter), false, + Ctx, std::move(MAB), std::move(OW), std::move(Emitter), (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb), T.isAndroid()); } @@ -370,10 +369,10 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, static MCStreamer * createARMMachOStreamer(MCContext &Ctx, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, bool DWARFMustBeAtTheEnd) { return createMachOStreamer(Ctx, std::move(MAB), std::move(OW), - std::move(Emitter), false, DWARFMustBeAtTheEnd); + std::move(Emitter), DWARFMustBeAtTheEnd); } static MCInstPrinter *createARMMCInstPrinter(const Triple &T, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 3066d9ba6783b2..a673d590419ecc 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -94,7 +94,6 @@ MCStreamer *createARMWinCOFFStreamer(MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, std::unique_ptr &&Emitter, - bool RelaxAll, bool IncrementalLinkerCompatible); /// Construct an ELF Mach-O object writer. diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index cdd7f6fb715a70..0fcf6eb1a5abb5 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -66,11 +66,12 @@ void ARMWinCOFFStreamer::finishImpl() { } } -MCStreamer *llvm::createARMWinCOFFStreamer( - MCContext &Context, std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll, - bool IncrementalLinkerCompatible) { +MCStreamer * +llvm::createARMWinCOFFStreamer(MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool IncrementalLinkerCompatible) { auto *S = new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), std::move(OW)); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index ba370261e284c5..119baff83dae46 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -72,10 +72,9 @@ static MCInstPrinter *createAVRMCInstPrinter(const Triple &T, static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { return createELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); } static MCTargetStreamer * diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 44932383fb43e9..caf84701b999f0 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -50,13 +50,13 @@ static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT, return createBPFMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } -static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, - std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { - return createELFStreamer(Ctx, std::move(MAB), std::move(OW), std::move(Emitter), - RelaxAll); +static MCStreamer * +createBPFMCStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { + return createELFStreamer(Ctx, std::move(MAB), std::move(OW), + std::move(Emitter)); } static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp index 64f01cd1c9fa7c..c3403ade389c40 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp @@ -88,13 +88,10 @@ createCSKYObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { CSKYELFStreamer *S = new CSKYELFStreamer(Ctx, std::move(MAB), std::move(OW), std::move(Emitter)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); return S; } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index dc8328a6705da8..0a948402fb896a 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -385,8 +385,7 @@ createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { return createHexagonELFStreamer(T, Context, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index 8f83c883e822e0..4a381c033b384d 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -63,13 +63,12 @@ createLanaiMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { if (!T.isOSBinFormatELF()) llvm_unreachable("OS not supported"); return createELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); } static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/, diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp index a6e15e09463d26..9e56333e5fd9b5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp @@ -82,11 +82,9 @@ namespace llvm { MCELFStreamer *createLoongArchELFStreamer(MCContext &C, std::unique_ptr MAB, std::unique_ptr MOW, - std::unique_ptr MCE, - bool RelaxAll) { + std::unique_ptr MCE) { LoongArchELFStreamer *S = new LoongArchELFStreamer( C, std::move(MAB), std::move(MOW), std::move(MCE)); - S->getAssembler().setRelaxAll(RelaxAll); return S; } } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h index 220b54092c72a1..e220729d8923e2 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h @@ -25,7 +25,6 @@ class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { MCELFStreamer *createLoongArchELFStreamer(MCContext &C, std::unique_ptr MAB, std::unique_ptr MOW, - std::unique_ptr MCE, - bool RelaxAll); + std::unique_ptr MCE); } // end namespace llvm #endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index a4e6a09863e6a6..e40981f5b5cd57 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -193,10 +193,9 @@ namespace { MCStreamer *createLoongArchELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&MOW, - std::unique_ptr &&MCE, - bool RelaxAll) { + std::unique_ptr &&MCE) { return createLoongArchELFStreamer(Context, std::move(MAB), std::move(MOW), - std::move(MCE), RelaxAll); + std::move(MCE)); } } // end namespace diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 9843b6144343e3..e907e8d8a70022 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -112,10 +112,11 @@ void MipsELFStreamer::EmitMipsOptionRecords() { I->EmitMipsOptionRecord(); } -MCELFStreamer *llvm::createMipsELFStreamer( - MCContext &Context, std::unique_ptr MAB, - std::unique_ptr OW, std::unique_ptr Emitter, - bool RelaxAll) { +MCELFStreamer * +llvm::createMipsELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter) { return new MipsELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index ac70e40d4dfe96..051806d2cfe8f5 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -75,8 +75,7 @@ class MipsELFStreamer : public MCELFStreamer { MCELFStreamer *createMipsELFStreamer(MCContext &Context, std::unique_ptr MAB, std::unique_ptr OW, - std::unique_ptr Emitter, - bool RelaxAll); + std::unique_ptr Emitter); } // end namespace llvm #endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index a84ca8ccfb2d12..2722e34b3f6246 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -22,11 +22,10 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, bool baseRegNeedsLoadStoreMask(unsigned Reg); // This function creates an MCELFStreamer for Mips NaCl. -MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, - std::unique_ptr TAB, - std::unique_ptr OW, - std::unique_ptr Emitter, - bool RelaxAll); +MCELFStreamer * +createMipsNaClELFStreamer(MCContext &Context, std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter); } #endif diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index d38b89f9a1f258..499cbd873e299a 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -104,15 +104,14 @@ static MCInstPrinter *createMipsMCInstPrinter(const Triple &T, static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, - bool RelaxAll) { + std::unique_ptr &&Emitter) { MCStreamer *S; if (!T.isOSNaCl()) S = createMipsELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); else S = createMipsNaClELFStreamer(Context, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); return S; } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 4ba0ae91e2f653..86194a9ebb61a2 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -259,15 +259,12 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) { return Reg != Mips::SP && Reg != Mips::T8; } -MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, - std::unique_ptr TAB, - std::unique_ptr OW, - std::unique_ptr Emitter, - bool RelaxAll) { +MCELFStreamer * +createMipsNaClELFStreamer(MCContext &Context, std::unique_ptr TAB, + std::unique_ptr OW, + std::unique_ptr Emitter) { MipsNaClELFStreamer *S = new MipsNaClELFStreamer( Context, std::move(TAB), std::move(OW), std::move(Emitter)); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); // Set bundle-alignment as required by the NaCl ABI for the target. S->emitBundleAlignMode(MIPS_NACL_BUNDLE_ALIGN); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index b849b7be7b7be8..241078b038735b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -203,15 +203,16 @@ static MCStreamer * createPPCELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll) { + std::unique_ptr &&Emitter) { return createPPCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)); } -static MCStreamer *createPPCXCOFFStreamer( - const Triple &T, MCContext &Context, std::unique_ptr &&MAB, - std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll) { +static MCStreamer * +createPPCXCOFFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter) { return createPPCXCOFFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index cdf7c048a4bf11..ae7ce476fff222 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -197,11 +197,9 @@ namespace llvm { MCELFStreamer *createRISCVELFStreamer(MCContext &C, std::unique_ptr MAB, std::unique_ptr MOW, - std::unique_ptr MCE, - bool RelaxAll) { + std::unique_ptr MCE) { RISCVELFStreamer *S = new RISCVELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)); - S->getAssembler().setRelaxAll(RelaxAll); return S; } } // namespace llvm diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index e8f29cd8449ba0..212d731889f1ae 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -75,7 +75,6 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer { MCELFStreamer *createRISCVELFStreamer(MCContext &C, std::unique_ptr MAB, std::unique_ptr MOW, - std::unique_ptr MCE, - bool RelaxAll); + std::unique_ptr MCE); } #endif diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 12a69842ab4c37..691a5892ae827b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -332,10 +332,9 @@ namespace { MCStreamer *createRISCVELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr &&MAB, std::unique_ptr &&MOW, - std::unique_ptr &&MCE, - bool RelaxAll) { + std::unique_ptr &&MCE) { return createRISCVELFStreamer(Context, std::move(MAB), std::move(MOW), - std::move(MCE), RelaxAll); + std::move(MCE)); } } // end anonymous namespace diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 0f92e9ed6a64d3..adb17cec28c26f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -75,6 +75,13 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, auto &ISAInfo = *ParseResult; emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString()); } + + if (STI.hasFeature(RISCV::FeatureStdExtA)) { + unsigned AtomicABITag = STI.hasFeature(RISCV::FeatureNoTrailingSeqCstFence) + ? RISCVAttrs::RISCVAtomicAbiTag::AtomicABI::A6C + : RISCVAttrs::RISCVAtomicAbiTag::AtomicABI::A6S; + emitAttribute(RISCVAttrs::ATOMIC_ABI, AtomicABITag); + } } // This part is for ascii assembly output diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index c3dc4ea53697c0..deb983528f323c 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1216,10 +1216,10 @@ foreach i = {1-31} in def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore", "true", "Enable save/restore.">; -def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence", - "EnableSeqCstTrailingFence", - "true", - "Enable trailing fence for seq-cst store.">; +def FeatureNoTrailingSeqCstFence : SubtargetFeature<"no-trailing-seq-cst-fence", + "EnableTrailingSeqCstFence", + "false", + "Disable trailing fence for seq-cst store.">; def FeatureUnalignedScalarMem : SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6529ab7a84a133..769c465d56f984 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + // WARNING: The code below is knowingly incorrect with regards to undef semantics. + // We're adding additional uses of X here, and in principle, we should be freezing + // X before doing so. However, adding freeze here causes real regressions, and no + // other target properly freezes X in these cases either. + SDValue X = N->getOperand(0); + for (uint64_t Divisor : {3, 5, 9}) { if (MulAmt % Divisor != 0) continue; @@ -13428,7 +13434,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X) if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) { SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); @@ -13446,7 +13451,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (ScaleShift >= 1 && ScaleShift < 4) { unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, @@ -13466,7 +13470,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, unsigned TZ = llvm::countr_zero(C); if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) { SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X); @@ -13481,7 +13484,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (ScaleShift >= 1 && ScaleShift < 4) { unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2))); SDLoc DL(N); - SDValue X = DAG.getFreeze(N->getOperand(0)); SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, Shift1, @@ -13495,11 +13497,11 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (isPowerOf2_64(MulAmt + Offset)) { SDLoc DL(N); SDValue Shift1 = - DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT)); - SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, N->getOperand(0), + SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X, DAG.getConstant(Log2_64(Offset - 1), DL, VT), - N->getOperand(0)); + X); return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); } } @@ -20190,7 +20192,7 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder, if (isa(Inst) && isAcquireOrStronger(Ord)) return Builder.CreateFence(AtomicOrdering::Acquire); - if (Subtarget.enableSeqCstTrailingFence() && isa(Inst) && + if (Subtarget.enableTrailingSeqCstFence() && isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 3d598dd6f708ef..3feb7ec347543c 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -262,6 +262,17 @@ struct DemandedFields { VLZeroness = true; } + // Make this the result of demanding both the fields in this and B. + void doUnion(const DemandedFields &B) { + VLAny |= B.VLAny; + VLZeroness |= B.VLZeroness; + SEW = std::max(SEW, B.SEW); + LMUL |= B.LMUL; + SEWLMULRatio |= B.SEWLMULRatio; + TailPolicy |= B.TailPolicy; + MaskPolicy |= B.MaskPolicy; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { @@ -1326,11 +1337,7 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, PHIOp += 2) { Register InReg = PHI->getOperand(PHIOp).getReg(); MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); - const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; - // If the exit from the predecessor has the VTYPE we are looking for - // we might be able to avoid a VSETVLI. - if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require)) - return true; + const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; // We need the PHI input to the be the output of a VSET(I)VLI. MachineInstr *DefMI = MRI->getVRegDef(InReg); @@ -1340,8 +1347,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, // We found a VSET(I)VLI make sure it matches the output of the // predecessor block. VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (!DefInfo.hasSameAVL(PBBInfo.Exit) || - !DefInfo.hasSameVTYPE(PBBInfo.Exit)) + if (DefInfo != PBBExit) + return true; + + // Require has the same VL as PBBExit, so if the exit from the + // predecessor has the VTYPE we are looking for we might be able + // to avoid a VSETVLI. + if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require)) return true; } @@ -1547,16 +1559,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { AvailableInfo, OldExit); } -static void doUnion(DemandedFields &A, DemandedFields B) { - A.VLAny |= B.VLAny; - A.VLZeroness |= B.VLZeroness; - A.SEW = std::max(A.SEW, B.SEW); - A.LMUL |= B.LMUL; - A.SEWLMULRatio |= B.SEWLMULRatio; - A.TailPolicy |= B.TailPolicy; - A.MaskPolicy |= B.MaskPolicy; -} - // Return true if we can mutate PrevMI to match MI without changing any the // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, @@ -1606,7 +1608,7 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) { for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (!isVectorConfigInstr(MI)) { - doUnion(Used, getDemanded(MI, MRI, ST)); + Used.doUnion(getDemanded(MI, MRI, ST)); if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 5c1f154efa9911..3efd09aeae879d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1633,8 +1633,230 @@ static bool isFMUL(unsigned Opc) { } } +bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const { +#define OPCODE_LMUL_CASE(OPC) \ + case RISCV::OPC##_M1: \ + case RISCV::OPC##_M2: \ + case RISCV::OPC##_M4: \ + case RISCV::OPC##_M8: \ + case RISCV::OPC##_MF2: \ + case RISCV::OPC##_MF4: \ + case RISCV::OPC##_MF8 + +#define OPCODE_LMUL_MASK_CASE(OPC) \ + case RISCV::OPC##_M1_MASK: \ + case RISCV::OPC##_M2_MASK: \ + case RISCV::OPC##_M4_MASK: \ + case RISCV::OPC##_M8_MASK: \ + case RISCV::OPC##_MF2_MASK: \ + case RISCV::OPC##_MF4_MASK: \ + case RISCV::OPC##_MF8_MASK + + unsigned Opcode = Inst.getOpcode(); + if (Invert) { + if (auto InvOpcode = getInverseOpcode(Opcode)) + Opcode = *InvOpcode; + else + return false; + } + + // clang-format off + switch (Opcode) { + default: + return false; + OPCODE_LMUL_CASE(PseudoVADD_VV): + OPCODE_LMUL_MASK_CASE(PseudoVADD_VV): + OPCODE_LMUL_CASE(PseudoVMUL_VV): + OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV): + return true; + } + // clang-format on + +#undef OPCODE_LMUL_MASK_CASE +#undef OPCODE_LMUL_CASE +} + +bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root, + const MachineInstr &Prev) const { + if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode())) + return false; + + assert(Root.getMF() == Prev.getMF()); + const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo(); + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + + // Make sure vtype operands are also the same. + const MCInstrDesc &Desc = get(Root.getOpcode()); + const uint64_t TSFlags = Desc.TSFlags; + + auto checkImmOperand = [&](unsigned OpIdx) { + return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm(); + }; + + auto checkRegOperand = [&](unsigned OpIdx) { + return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg(); + }; + + // PassThru + // TODO: Potentially we can loosen the condition to consider Root to be + // associable with Prev if Root has NoReg as passthru. In which case we + // also need to loosen the condition on vector policies between these. + if (!checkRegOperand(1)) + return false; + + // SEW + if (RISCVII::hasSEWOp(TSFlags) && + !checkImmOperand(RISCVII::getSEWOpNum(Desc))) + return false; + + // Mask + if (RISCVII::usesMaskPolicy(TSFlags)) { + const MachineBasicBlock *MBB = Root.getParent(); + const MachineBasicBlock::const_reverse_iterator It1(&Root); + const MachineBasicBlock::const_reverse_iterator It2(&Prev); + Register MI1VReg; + + bool SeenMI2 = false; + for (auto End = MBB->rend(), It = It1; It != End; ++It) { + if (It == It2) { + SeenMI2 = true; + if (!MI1VReg.isValid()) + // There is no V0 def between Root and Prev; they're sharing the + // same V0. + break; + } + + if (It->modifiesRegister(RISCV::V0, TRI)) { + Register SrcReg = It->getOperand(1).getReg(); + // If it's not VReg it'll be more difficult to track its defs, so + // bailing out here just to be safe. + if (!SrcReg.isVirtual()) + return false; + + if (!MI1VReg.isValid()) { + // This is the V0 def for Root. + MI1VReg = SrcReg; + continue; + } + + // Some random mask updates. + if (!SeenMI2) + continue; + + // This is the V0 def for Prev; check if it's the same as that of + // Root. + if (MI1VReg != SrcReg) + return false; + else + break; + } + } + + // If we haven't encountered Prev, it's likely that this function was + // called in a wrong way (e.g. Root is before Prev). + assert(SeenMI2 && "Prev is expected to appear before Root"); + } + + // Tail / Mask policies + if (RISCVII::hasVecPolicyOp(TSFlags) && + !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc))) + return false; + + // VL + if (RISCVII::hasVLOp(TSFlags)) { + unsigned OpIdx = RISCVII::getVLOpNum(Desc); + const MachineOperand &Op1 = Root.getOperand(OpIdx); + const MachineOperand &Op2 = Prev.getOperand(OpIdx); + if (Op1.getType() != Op2.getType()) + return false; + switch (Op1.getType()) { + case MachineOperand::MO_Register: + if (Op1.getReg() != Op2.getReg()) + return false; + break; + case MachineOperand::MO_Immediate: + if (Op1.getImm() != Op2.getImm()) + return false; + break; + default: + llvm_unreachable("Unrecognized VL operand type"); + } + } + + // Rounding modes + if (RISCVII::hasRoundModeOp(TSFlags) && + !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1)) + return false; + + return true; +} + +// Most of our RVV pseudos have passthru operand, so the real operands +// start from index = 2. +bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst, + bool &Commuted) const { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) && + "Expect the present of passthrough operand."); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg()); + + // If only one operand has the same or inverse opcode and it's the second + // source operand, the operands must be commuted. + Commuted = !areRVVInstsReassociable(Inst, *MI1) && + areRVVInstsReassociable(Inst, *MI2); + if (Commuted) + std::swap(MI1, MI2); + + return areRVVInstsReassociable(Inst, *MI1) && + (isVectorAssociativeAndCommutative(*MI1) || + isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) && + hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); +} + +bool RISCVInstrInfo::hasReassociableOperands( + const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + if (!isVectorAssociativeAndCommutative(Inst) && + !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) + return TargetInstrInfo::hasReassociableOperands(Inst, MBB); + + const MachineOperand &Op1 = Inst.getOperand(2); + const MachineOperand &Op2 = Inst.getOperand(3); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // We need virtual register definitions for the operands that we will + // reassociate. + MachineInstr *MI1 = nullptr; + MachineInstr *MI2 = nullptr; + if (Op1.isReg() && Op1.getReg().isVirtual()) + MI1 = MRI.getUniqueVRegDef(Op1.getReg()); + if (Op2.isReg() && Op2.getReg().isVirtual()) + MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + + // And at least one operand must be defined in MBB. + return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); +} + +void RISCVInstrInfo::getReassociateOperandIndices( + const MachineInstr &Root, unsigned Pattern, + std::array &OperandIndices) const { + TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices); + if (RISCV::getRVVMCOpcode(Root.getOpcode())) { + // Skip the passthrough operand, so increment all indices by one. + for (unsigned I = 0; I < 5; ++I) + ++OperandIndices[I]; + } +} + bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const { + if (isVectorAssociativeAndCommutative(Inst) || + isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) + return hasReassociableVectorSibling(Inst, Commuted); + if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) return false; @@ -1654,6 +1876,9 @@ bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const { + if (isVectorAssociativeAndCommutative(Inst, Invert)) + return true; + unsigned Opc = Inst.getOpcode(); if (Invert) { auto InverseOpcode = getInverseOpcode(Opc); @@ -1706,6 +1931,38 @@ bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, std::optional RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { +#define RVV_OPC_LMUL_CASE(OPC, INV) \ + case RISCV::OPC##_M1: \ + return RISCV::INV##_M1; \ + case RISCV::OPC##_M2: \ + return RISCV::INV##_M2; \ + case RISCV::OPC##_M4: \ + return RISCV::INV##_M4; \ + case RISCV::OPC##_M8: \ + return RISCV::INV##_M8; \ + case RISCV::OPC##_MF2: \ + return RISCV::INV##_MF2; \ + case RISCV::OPC##_MF4: \ + return RISCV::INV##_MF4; \ + case RISCV::OPC##_MF8: \ + return RISCV::INV##_MF8 + +#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \ + case RISCV::OPC##_M1_MASK: \ + return RISCV::INV##_M1_MASK; \ + case RISCV::OPC##_M2_MASK: \ + return RISCV::INV##_M2_MASK; \ + case RISCV::OPC##_M4_MASK: \ + return RISCV::INV##_M4_MASK; \ + case RISCV::OPC##_M8_MASK: \ + return RISCV::INV##_M8_MASK; \ + case RISCV::OPC##_MF2_MASK: \ + return RISCV::INV##_MF2_MASK; \ + case RISCV::OPC##_MF4_MASK: \ + return RISCV::INV##_MF4_MASK; \ + case RISCV::OPC##_MF8_MASK: \ + return RISCV::INV##_MF8_MASK + switch (Opcode) { default: return std::nullopt; @@ -1729,7 +1986,16 @@ RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { return RISCV::SUBW; case RISCV::SUBW: return RISCV::ADDW; + // clang-format off + RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV); + RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV); + RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV); + RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV); + // clang-format on } + +#undef RVV_OPC_LMUL_MASK_CASE +#undef RVV_OPC_LMUL_CASE } static bool canCombineFPFusedMultiply(const MachineInstr &Root, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 3b03d5efde6ef5..170f813eb10d7d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -266,6 +266,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const override; @@ -274,6 +277,10 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { std::optional getInverseOpcode(unsigned Opcode) const override; + void getReassociateOperandIndices( + const MachineInstr &Root, unsigned Pattern, + std::array &OperandIndices) const override; + ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; @@ -297,6 +304,13 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { private: unsigned getInstBundleLength(const MachineInstr &MI) const; + + bool isVectorAssociativeAndCommutative(const MachineInstr &MI, + bool Invert = false) const; + bool areRVVInstsReassociable(const MachineInstr &MI1, + const MachineInstr &MI2) const; + bool hasReassociableVectorSibling(const MachineInstr &Inst, + bool &Commuted) const; }; namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index aac7dc444a2de3..aaf9c019aedfe8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -1,4 +1,4 @@ -//===-- RISCVInstrInfoZvk.td - RISC-V 'Zvk' instructions -------*- tablegen -*-===// +//===-- RISCVInstrInfoZvk.td - RISC-V 'Zvk' instructions ---*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp index 78dfbf4ec9327a..74ebaa9d0c0047 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCTargetDesc.cpp @@ -53,9 +53,9 @@ static MCStreamer * createSPIRVMCStreamer(const Triple &T, MCContext &Ctx, std::unique_ptr &&MAB, std::unique_ptr &&OW, - std::unique_ptr &&Emitter, bool RelaxAll) { + std::unique_ptr &&Emitter) { return createSPIRVStreamer(Ctx, std::move(MAB), std::move(OW), - std::move(Emitter), RelaxAll); + std::move(Emitter)); } static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 115f34fa7751da..2da4431cf077eb 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -9631,7 +9631,7 @@ SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, case 8: case 16: Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); - LLVM_FALLTHROUGH; + [[fallthrough]]; case 32: case 64: Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index b7b5b2a97c59e2..8ea02bd2ad1ff0 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -18,24 +18,16 @@ using namespace llvm; std::optional WebAssembly::parseType(StringRef Type) { - // FIXME: can't use StringSwitch because wasm::ValType doesn't have a - // "invalid" value. - if (Type == "i32") - return wasm::ValType::I32; - if (Type == "i64") - return wasm::ValType::I64; - if (Type == "f32") - return wasm::ValType::F32; - if (Type == "f64") - return wasm::ValType::F64; - if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || Type == "i32x4" || - Type == "i64x2" || Type == "f32x4" || Type == "f64x2") - return wasm::ValType::V128; - if (Type == "funcref") - return wasm::ValType::FUNCREF; - if (Type == "externref") - return wasm::ValType::EXTERNREF; - return std::nullopt; + return llvm::StringSwitch>{Type} + .Case("i32", wasm::ValType::I32) + .Case("i64", wasm::ValType::I64) + .Case("f32", wasm::ValType::F32) + .Case("f64", wasm::ValType::F64) + .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2", + wasm::ValType::V128) + .Case("funcref", wasm::ValType::FUNCREF) + .Case("externref", wasm::ValType::EXTERNREF) + .Default(std::nullopt); } WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 437a7bd6ff6c4c..18ecca34943f6b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -121,7 +121,6 @@ MCStreamer *createX86WinCOFFStreamer(MCContext &C, std::unique_ptr &&AB, std::unique_ptr &&OW, std::unique_ptr &&CE, - bool RelaxAll, bool IncrementalLinkerCompatible); /// Construct an X86 Mach-O object writer. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 36945d1f67468f..dac8bc1fb1be35 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -70,11 +70,9 @@ MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, std::unique_ptr &&AB, std::unique_ptr &&OW, std::unique_ptr &&CE, - bool RelaxAll, bool IncrementalLinkerCompatible) { X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), std::move(OW)); - S->getAssembler().setRelaxAll(RelaxAll); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; } diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 39cb3f2c2fe178..ea0b56b9a1339b 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -47,204 +47,8 @@ static const char *RISCVGImplications[] = { "i", "m", "a", "f", "d", "zicsr", "zifencei" }; -// NOTE: This table should be sorted alphabetically by extension name. -static const RISCVSupportedExtension SupportedExtensions[] = { - {"a", {2, 1}}, - {"c", {2, 0}}, - {"d", {2, 2}}, - {"e", {2, 0}}, - {"f", {2, 2}}, - {"h", {1, 0}}, - {"i", {2, 1}}, - {"m", {2, 0}}, - - {"shcounterenw", {1, 0}}, - {"shgatpa", {1, 0}}, - {"shtvala", {1, 0}}, - {"shvsatpa", {1, 0}}, - {"shvstvala", {1, 0}}, - {"shvstvecd", {1, 0}}, - {"smaia", {1, 0}}, - {"smepmp", {1, 0}}, - {"ssaia", {1, 0}}, - {"ssccptr", {1, 0}}, - {"sscofpmf", {1, 0}}, - {"sscounterenw", {1, 0}}, - {"ssstateen", {1, 0}}, - {"ssstrict", {1, 0}}, - {"sstc", {1, 0}}, - {"sstvala", {1, 0}}, - {"sstvecd", {1, 0}}, - {"ssu64xl", {1, 0}}, - {"svade", {1, 0}}, - {"svadu", {1, 0}}, - {"svbare", {1, 0}}, - {"svinval", {1, 0}}, - {"svnapot", {1, 0}}, - {"svpbmt", {1, 0}}, - - {"v", {1, 0}}, - - // vendor-defined ('X') extensions - {"xcvalu", {1, 0}}, - {"xcvbi", {1, 0}}, - {"xcvbitmanip", {1, 0}}, - {"xcvelw", {1, 0}}, - {"xcvmac", {1, 0}}, - {"xcvmem", {1, 0}}, - {"xcvsimd", {1, 0}}, - {"xsfcease", {1, 0}}, - {"xsfvcp", {1, 0}}, - {"xsfvfnrclipxfqf", {1, 0}}, - {"xsfvfwmaccqqq", {1, 0}}, - {"xsfvqmaccdod", {1, 0}}, - {"xsfvqmaccqoq", {1, 0}}, - {"xsifivecdiscarddlone", {1, 0}}, - {"xsifivecflushdlone", {1, 0}}, - {"xtheadba", {1, 0}}, - {"xtheadbb", {1, 0}}, - {"xtheadbs", {1, 0}}, - {"xtheadcmo", {1, 0}}, - {"xtheadcondmov", {1, 0}}, - {"xtheadfmemidx", {1, 0}}, - {"xtheadmac", {1, 0}}, - {"xtheadmemidx", {1, 0}}, - {"xtheadmempair", {1, 0}}, - {"xtheadsync", {1, 0}}, - {"xtheadvdot", {1, 0}}, - {"xventanacondops", {1, 0}}, - - {"za128rs", {1, 0}}, - {"za64rs", {1, 0}}, - {"zacas", {1, 0}}, - {"zama16b", {1, 0}}, - {"zawrs", {1, 0}}, - - {"zba", {1, 0}}, - {"zbb", {1, 0}}, - {"zbc", {1, 0}}, - {"zbkb", {1, 0}}, - {"zbkc", {1, 0}}, - {"zbkx", {1, 0}}, - {"zbs", {1, 0}}, - - {"zca", {1, 0}}, - {"zcb", {1, 0}}, - {"zcd", {1, 0}}, - {"zce", {1, 0}}, - {"zcf", {1, 0}}, - {"zcmop", {1, 0}}, - {"zcmp", {1, 0}}, - {"zcmt", {1, 0}}, - - {"zdinx", {1, 0}}, - - {"zfa", {1, 0}}, - {"zfh", {1, 0}}, - {"zfhmin", {1, 0}}, - {"zfinx", {1, 0}}, - - {"zhinx", {1, 0}}, - {"zhinxmin", {1, 0}}, - - {"zic64b", {1, 0}}, - {"zicbom", {1, 0}}, - {"zicbop", {1, 0}}, - {"zicboz", {1, 0}}, - {"ziccamoa", {1, 0}}, - {"ziccif", {1, 0}}, - {"zicclsm", {1, 0}}, - {"ziccrse", {1, 0}}, - {"zicntr", {2, 0}}, - {"zicond", {1, 0}}, - {"zicsr", {2, 0}}, - {"zifencei", {2, 0}}, - {"zihintntl", {1, 0}}, - {"zihintpause", {2, 0}}, - {"zihpm", {2, 0}}, - {"zimop", {1, 0}}, - - {"zk", {1, 0}}, - {"zkn", {1, 0}}, - {"zknd", {1, 0}}, - {"zkne", {1, 0}}, - {"zknh", {1, 0}}, - {"zkr", {1, 0}}, - {"zks", {1, 0}}, - {"zksed", {1, 0}}, - {"zksh", {1, 0}}, - {"zkt", {1, 0}}, - - {"zmmul", {1, 0}}, - - {"zvbb", {1, 0}}, - {"zvbc", {1, 0}}, - - {"zve32f", {1, 0}}, - {"zve32x", {1, 0}}, - {"zve64d", {1, 0}}, - {"zve64f", {1, 0}}, - {"zve64x", {1, 0}}, - - {"zvfh", {1, 0}}, - {"zvfhmin", {1, 0}}, - - // vector crypto - {"zvkb", {1, 0}}, - {"zvkg", {1, 0}}, - {"zvkn", {1, 0}}, - {"zvknc", {1, 0}}, - {"zvkned", {1, 0}}, - {"zvkng", {1, 0}}, - {"zvknha", {1, 0}}, - {"zvknhb", {1, 0}}, - {"zvks", {1, 0}}, - {"zvksc", {1, 0}}, - {"zvksed", {1, 0}}, - {"zvksg", {1, 0}}, - {"zvksh", {1, 0}}, - {"zvkt", {1, 0}}, - - {"zvl1024b", {1, 0}}, - {"zvl128b", {1, 0}}, - {"zvl16384b", {1, 0}}, - {"zvl2048b", {1, 0}}, - {"zvl256b", {1, 0}}, - {"zvl32768b", {1, 0}}, - {"zvl32b", {1, 0}}, - {"zvl4096b", {1, 0}}, - {"zvl512b", {1, 0}}, - {"zvl64b", {1, 0}}, - {"zvl65536b", {1, 0}}, - {"zvl8192b", {1, 0}}, -}; - -// NOTE: This table should be sorted alphabetically by extension name. -// clang-format off -static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { - {"smmpm", {0, 8}}, - {"smnpm", {0, 8}}, - {"ssnpm", {0, 8}}, - {"sspm", {0, 8}}, - {"ssqosid", {1, 0}}, - {"supm", {0, 8}}, - - {"zaamo", {0, 2}}, - {"zabha", {1, 0}}, - {"zalasr", {0, 1}}, - {"zalrsc", {0, 2}}, - - {"zfbfmin", {1, 0}}, - - {"zicfilp", {0, 4}}, - {"zicfiss", {0, 4}}, - - {"ztso", {0, 1}}, - - {"zvfbfmin", {1, 0}}, - {"zvfbfwma", {1, 0}}, -}; -// clang-format on +#define GET_SUPPORTED_EXTENSIONS +#include "llvm/TargetParser/RISCVTargetParserDef.inc" static constexpr RISCVProfile SupportedProfiles[] = { {"rvi20u32", "rv32i"}, @@ -1041,66 +845,6 @@ Error RISCVISAInfo::checkDependency() { return Error::success(); } -static const char *ImpliedExtsD[] = {"f"}; -static const char *ImpliedExtsF[] = {"zicsr"}; -static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"}; -static const char *ImpliedExtsXSfvcp[] = {"zve32x"}; -static const char *ImpliedExtsXSfvfnrclipxfqf[] = {"zve32f"}; -static const char *ImpliedExtsXSfvfwmaccqqq[] = {"zvfbfmin"}; -static const char *ImpliedExtsXSfvqmaccdod[] = {"zve32x"}; -static const char *ImpliedExtsXSfvqmaccqoq[] = {"zve32x"}; -static const char *ImpliedExtsXTHeadVdot[] = {"v"}; -static const char *ImpliedExtsZcb[] = {"zca"}; -static const char *ImpliedExtsZcd[] = {"d", "zca"}; -static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"}; -static const char *ImpliedExtsZcf[] = {"f", "zca"}; -static const char *ImpliedExtsZcmop[] = {"zca"}; -static const char *ImpliedExtsZcmp[] = {"zca"}; -static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"}; -static const char *ImpliedExtsZdinx[] = {"zfinx"}; -static const char *ImpliedExtsZfa[] = {"f"}; -static const char *ImpliedExtsZfbfmin[] = {"f"}; -static const char *ImpliedExtsZfh[] = {"zfhmin"}; -static const char *ImpliedExtsZfhmin[] = {"f"}; -static const char *ImpliedExtsZfinx[] = {"zicsr"}; -static const char *ImpliedExtsZhinx[] = {"zhinxmin"}; -static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; -static const char *ImpliedExtsZicfiss[] = {"zicsr", "zimop"}; -static const char *ImpliedExtsZicntr[] = {"zicsr"}; -static const char *ImpliedExtsZihpm[] = {"zicsr"}; -static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; -static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", - "zkne", "zknd", "zknh"}; -static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"}; -static const char *ImpliedExtsZvbb[] = {"zvkb"}; -static const char *ImpliedExtsZve32f[] = {"zve32x", "f"}; -static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"}; -static const char *ImpliedExtsZve64d[] = {"zve64f", "d"}; -static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; -static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; -static const char *ImpliedExtsZvfbfmin[] = {"zve32f"}; -static const char *ImpliedExtsZvfbfwma[] = {"zvfbfmin", "zfbfmin"}; -static const char *ImpliedExtsZvfh[] = {"zvfhmin", "zfhmin"}; -static const char *ImpliedExtsZvfhmin[] = {"zve32f"}; -static const char *ImpliedExtsZvkn[] = {"zvkb", "zvkned", "zvknhb", "zvkt"}; -static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"}; -static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"}; -static const char *ImpliedExtsZvknhb[] = {"zve64x"}; -static const char *ImpliedExtsZvks[] = {"zvkb", "zvksed", "zvksh", "zvkt"}; -static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"}; -static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"}; -static const char *ImpliedExtsZvl1024b[] = {"zvl512b"}; -static const char *ImpliedExtsZvl128b[] = {"zvl64b"}; -static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"}; -static const char *ImpliedExtsZvl2048b[] = {"zvl1024b"}; -static const char *ImpliedExtsZvl256b[] = {"zvl128b"}; -static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"}; -static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"}; -static const char *ImpliedExtsZvl512b[] = {"zvl256b"}; -static const char *ImpliedExtsZvl64b[] = {"zvl32b"}; -static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"}; -static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"}; - struct ImpliedExtsEntry { StringLiteral Name; ArrayRef Exts; @@ -1112,67 +856,8 @@ struct ImpliedExtsEntry { bool operator<(StringRef Other) const { return Name < Other; } }; -// Note: The table needs to be sorted by name. -static constexpr ImpliedExtsEntry ImpliedExts[] = { - {{"d"}, {ImpliedExtsD}}, - {{"f"}, {ImpliedExtsF}}, - {{"v"}, {ImpliedExtsV}}, - {{"xsfvcp"}, {ImpliedExtsXSfvcp}}, - {{"xsfvfnrclipxfqf"}, {ImpliedExtsXSfvfnrclipxfqf}}, - {{"xsfvfwmaccqqq"}, {ImpliedExtsXSfvfwmaccqqq}}, - {{"xsfvqmaccdod"}, {ImpliedExtsXSfvqmaccdod}}, - {{"xsfvqmaccqoq"}, {ImpliedExtsXSfvqmaccqoq}}, - {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}}, - {{"zcb"}, {ImpliedExtsZcb}}, - {{"zcd"}, {ImpliedExtsZcd}}, - {{"zce"}, {ImpliedExtsZce}}, - {{"zcf"}, {ImpliedExtsZcf}}, - {{"zcmop"}, {ImpliedExtsZcmop}}, - {{"zcmp"}, {ImpliedExtsZcmp}}, - {{"zcmt"}, {ImpliedExtsZcmt}}, - {{"zdinx"}, {ImpliedExtsZdinx}}, - {{"zfa"}, {ImpliedExtsZfa}}, - {{"zfbfmin"}, {ImpliedExtsZfbfmin}}, - {{"zfh"}, {ImpliedExtsZfh}}, - {{"zfhmin"}, {ImpliedExtsZfhmin}}, - {{"zfinx"}, {ImpliedExtsZfinx}}, - {{"zhinx"}, {ImpliedExtsZhinx}}, - {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, - {{"zicfiss"}, {ImpliedExtsZicfiss}}, - {{"zicntr"}, {ImpliedExtsZicntr}}, - {{"zihpm"}, {ImpliedExtsZihpm}}, - {{"zk"}, {ImpliedExtsZk}}, - {{"zkn"}, {ImpliedExtsZkn}}, - {{"zks"}, {ImpliedExtsZks}}, - {{"zvbb"}, {ImpliedExtsZvbb}}, - {{"zve32f"}, {ImpliedExtsZve32f}}, - {{"zve32x"}, {ImpliedExtsZve32x}}, - {{"zve64d"}, {ImpliedExtsZve64d}}, - {{"zve64f"}, {ImpliedExtsZve64f}}, - {{"zve64x"}, {ImpliedExtsZve64x}}, - {{"zvfbfmin"}, {ImpliedExtsZvfbfmin}}, - {{"zvfbfwma"}, {ImpliedExtsZvfbfwma}}, - {{"zvfh"}, {ImpliedExtsZvfh}}, - {{"zvfhmin"}, {ImpliedExtsZvfhmin}}, - {{"zvkn"}, {ImpliedExtsZvkn}}, - {{"zvknc"}, {ImpliedExtsZvknc}}, - {{"zvkng"}, {ImpliedExtsZvkng}}, - {{"zvknhb"}, {ImpliedExtsZvknhb}}, - {{"zvks"}, {ImpliedExtsZvks}}, - {{"zvksc"}, {ImpliedExtsZvksc}}, - {{"zvksg"}, {ImpliedExtsZvksg}}, - {{"zvl1024b"}, {ImpliedExtsZvl1024b}}, - {{"zvl128b"}, {ImpliedExtsZvl128b}}, - {{"zvl16384b"}, {ImpliedExtsZvl16384b}}, - {{"zvl2048b"}, {ImpliedExtsZvl2048b}}, - {{"zvl256b"}, {ImpliedExtsZvl256b}}, - {{"zvl32768b"}, {ImpliedExtsZvl32768b}}, - {{"zvl4096b"}, {ImpliedExtsZvl4096b}}, - {{"zvl512b"}, {ImpliedExtsZvl512b}}, - {{"zvl64b"}, {ImpliedExtsZvl64b}}, - {{"zvl65536b"}, {ImpliedExtsZvl65536b}}, - {{"zvl8192b"}, {ImpliedExtsZvl8192b}}, -}; +#define GET_IMPLIED_EXTENSIONS +#include "llvm/TargetParser/RISCVTargetParserDef.inc" void RISCVISAInfo::updateImplication() { bool HasE = Exts.count("e") != 0; diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index f8920541e6fd64..e591a8e73b1c6f 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -281,32 +281,21 @@ static bool runIPSCCP( Function *F = I.first; const ValueLatticeElement &ReturnValue = I.second; - // If there is a known constant range for the return value, add !range - // metadata to the function's call sites. + // If there is a known constant range for the return value, add range + // attribute to the return value. if (ReturnValue.isConstantRange() && !ReturnValue.getConstantRange().isSingleElement()) { // Do not add range metadata if the return value may include undef. if (ReturnValue.isConstantRangeIncludingUndef()) continue; + // Do not touch existing attribute for now. + // TODO: We should be able to take the intersection of the existing + // attribute and the inferred range. + if (F->hasRetAttribute(Attribute::Range)) + continue; auto &CR = ReturnValue.getConstantRange(); - for (User *User : F->users()) { - auto *CB = dyn_cast(User); - if (!CB || CB->getCalledFunction() != F) - continue; - - // Do not touch existing metadata for now. - // TODO: We should be able to take the intersection of the existing - // metadata and the inferred range. - if (CB->getMetadata(LLVMContext::MD_range)) - continue; - - LLVMContext &Context = CB->getParent()->getContext(); - Metadata *RangeMD[] = { - ConstantAsMetadata::get(ConstantInt::get(Context, CR.getLower())), - ConstantAsMetadata::get(ConstantInt::get(Context, CR.getUpper()))}; - CB->setMetadata(LLVMContext::MD_range, MDNode::get(Context, RangeMD)); - } + F->addRangeRetAttr(CR); continue; } if (F->getReturnType()->isVoidTy()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 88b7e496897e1f..51ac77348ed9e3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2001,43 +2001,30 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, if (!GEP1) return nullptr; - if (GEP2) { - // (gep X, ...) - (gep X, ...) - // - // Avoid duplicating the arithmetic if there are more than one non-constant - // indices between the two GEPs and either GEP has a non-constant index and - // multiple users. If zero non-constant index, the result is a constant and - // there is no duplication. If one non-constant index, the result is an add - // or sub with a constant, which is no larger than the original code, and - // there's no duplicated arithmetic, even if either GEP has multiple - // users. If more than one non-constant indices combined, as long as the GEP - // with at least one non-constant index doesn't have multiple users, there - // is no duplication. - unsigned NumNonConstantIndices1 = GEP1->countNonConstantIndices(); - unsigned NumNonConstantIndices2 = GEP2->countNonConstantIndices(); - if (NumNonConstantIndices1 + NumNonConstantIndices2 > 1 && - ((NumNonConstantIndices1 > 0 && !GEP1->hasOneUse()) || - (NumNonConstantIndices2 > 0 && !GEP2->hasOneUse()))) { - return nullptr; - } - } + // To avoid duplicating the offset arithmetic, rewrite the GEP to use the + // computed offset. This may erase the original GEP, so be sure to cache the + // inbounds flag before emitting the offset. + // TODO: We should probably do this even if there is only one GEP. + bool RewriteGEPs = GEP2 != nullptr; // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP1); + bool GEP1IsInBounds = GEP1->isInBounds(); + Value *Result = EmitGEPOffset(GEP1, RewriteGEPs); // If this is a single inbounds GEP and the original sub was nuw, // then the final multiplication is also nuw. if (auto *I = dyn_cast(Result)) - if (IsNUW && !GEP2 && !Swapped && GEP1->isInBounds() && + if (IsNUW && !GEP2 && !Swapped && GEP1IsInBounds && I->getOpcode() == Instruction::Mul) I->setHasNoUnsignedWrap(); // If we have a 2nd GEP of the same base pointer, subtract the offsets. // If both GEPs are inbounds, then the subtract does not have signed overflow. if (GEP2) { - Value *Offset = EmitGEPOffset(GEP2); + bool GEP2IsInBounds = GEP2->isInBounds(); + Value *Offset = EmitGEPOffset(GEP2, RewriteGEPs); Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false, - GEP1->isInBounds() && GEP2->isInBounds()); + GEP1IsInBounds && GEP2IsInBounds); } // If we have p - gep(p, ...) then we have to negate the result. @@ -2781,6 +2768,16 @@ Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) { propagateSelectFMF(NewSel, P == X); return NewSel; } + + // -(Cond ? X : C) --> Cond ? -X : -C + // -(Cond ? C : Y) --> Cond ? -C : -Y + if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) { + Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg"); + Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg"); + SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY); + propagateSelectFMF(NewSel, /*CommonOperand=*/true); + return NewSel; + } } // fneg (copysign x, y) -> copysign x, (fneg y) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 282badd4369330..58b2d8e9dec1c3 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2339,6 +2339,43 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } +// Canonicalization: +// gep T, (gep i8, base, C1), (Index + C2) into +// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index +static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, + GEPOperator *Src, + InstCombinerImpl &IC) { + if (GEP.getNumIndices() != 1) + return nullptr; + auto &DL = IC.getDataLayout(); + Value *Base; + const APInt *C1; + if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1)))) + return nullptr; + Value *VarIndex; + const APInt *C2; + Type *PtrTy = Src->getType()->getScalarType(); + unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy); + if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2)))) + return nullptr; + if (C1->getBitWidth() != IndexSizeInBits || + C2->getBitWidth() != IndexSizeInBits) + return nullptr; + Type *BaseType = GEP.getSourceElementType(); + if (isa(BaseType)) + return nullptr; + APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType)); + APInt NewOffset = TypeSize * *C2 + *C1; + if (NewOffset.isZero() || + (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) { + Value *GEPConst = + IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset)); + return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src) { // Combine Indices - If the source pointer to this getelementptr instruction @@ -2347,6 +2384,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; + if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this)) + return I; + // For constant GEPs, use a more general offset-based folding approach. Type *PtrTy = Src->getType()->getScalarType(); if (GEP.hasAllConstantIndices() && diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index e5ef0333696d0f..66ee2fce8313e8 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2506,6 +2506,8 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy, bool Signed = false) { Type *srcTy = V->getType(); + if (srcTy == dstTy) + return V; size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); if (srcSizeInBits > 1 && dstSizeInBits == 1) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index edddfb1b92402f..059900f357e64b 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate { // We don't need to check non-wrapping here because forward/backward // dependence wouldn't be valid if these weren't monotonic accesses. - auto *Dist = cast( + auto *Dist = dyn_cast( PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); + if (!Dist) + return false; const APInt &Val = Dist->getAPInt(); return Val == TypeByteSize * StrideLoad; } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3eda669eb8a726..4db72461c95e47 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7524,6 +7524,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; + // llvm.assume(false/undef) always triggers immediate UB. + if (auto *Assume = dyn_cast(Use)) { + // Ignore assume operand bundles. + if (I == Assume->getArgOperand(0)) + return true; + } + if (auto *CB = dyn_cast(Use)) { if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) return false; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a1a28076881cb5..0cd7bd77722260 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15072,11 +15072,16 @@ void BoUpSLP::computeMinimumValueSizes() { IsSignedCmp = NodeIdx < VectorizableTree.size() && any_of(VectorizableTree[NodeIdx]->UserTreeIndices, - [](const EdgeInfo &EI) { + [&](const EdgeInfo &EI) { return EI.UserTE->getOpcode() == Instruction::ICmp && - any_of(EI.UserTE->Scalars, [](Value *V) { + any_of(EI.UserTE->Scalars, [&](Value *V) { auto *IC = dyn_cast(V); - return IC && IC->isSigned(); + return IC && + (IC->isSigned() || + !isKnownNonNegative(IC->getOperand(0), + SimplifyQuery(*DL)) || + !isKnownNonNegative(IC->getOperand(1), + SimplifyQuery(*DL))); }); }); } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll index 932129bbb957fa..5312c36e436a21 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll @@ -6,10 +6,9 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) { ; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: @@ -44,10 +43,9 @@ exit: define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) { ; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll index 51755314896bb3..5f4c732dc19df0 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll @@ -8,10 +8,9 @@ declare void @llvm.assume(i1) define void @different_non_constant_strides_known_forward(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_forward' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: @@ -45,10 +44,9 @@ exit: define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: diff --git a/llvm/test/CodeGen/AArch64/concatbinop.ll b/llvm/test/CodeGen/AArch64/concatbinop.ll index a13e62e0612cc0..828182d18b38ce 100644 --- a/llvm/test/CodeGen/AArch64/concatbinop.ll +++ b/llvm/test/CodeGen/AArch64/concatbinop.ll @@ -5,9 +5,13 @@ define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: concat_add: ; CHECK: // %bb.0: -; CHECK-NEXT: add v2.4h, v2.4h, v3.4h -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = add <4 x i16> %a, %b %y = add <4 x i16> %c, %d @@ -33,13 +37,9 @@ define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: concat_addtunc2: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn v3.4h, v3.4s -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: add v1.4h, v2.4h, v3.4h -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v1.8h, v1.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %at = trunc <4 x i32> %a to <4 x i16> %bt = trunc <4 x i32> %b to <4 x i16> @@ -54,9 +54,13 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: concat_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: sub v2.4h, v2.4h, v3.4h -; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = sub <4 x i16> %a, %b %y = sub <4 x i16> %c, %d @@ -67,9 +71,13 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: concat_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v2.4h, v2.4h, v3.4h -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = mul <4 x i16> %a, %b %y = mul <4 x i16> %c, %d @@ -80,9 +88,13 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: concat_xor: ; CHECK: // %bb.0: -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %x = xor <4 x i16> %a, %b %y = xor <4 x i16> %c, %d @@ -93,9 +105,13 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { ; CHECK-LABEL: concat_fadd: ; CHECK: // %bb.0: -; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h -; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = fadd <4 x half> %a, %b %y = fadd <4 x half> %c, %d @@ -106,9 +122,13 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { ; CHECK-LABEL: concat_fmul: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul v2.4h, v2.4h, v3.4h -; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = fmul <4 x half> %a, %b %y = fmul <4 x half> %c, %d @@ -119,9 +139,13 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { ; CHECK-LABEL: concat_min: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm v2.4h, v2.4h, v3.4h -; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v1.d[1], v3.d[0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) %y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d) @@ -146,21 +170,16 @@ define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture n ; CHECK-LABEL: signOf_neon: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: movi v0.8b, #1 +; CHECK-NEXT: movi v0.16b, #1 ; CHECK-NEXT: ldp q3, q4, [x1] ; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h ; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h ; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h ; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h -; CHECK-NEXT: xtn v3.8b, v5.8h -; CHECK-NEXT: xtn v4.8b, v6.8h -; CHECK-NEXT: xtn v1.8b, v1.8h -; CHECK-NEXT: xtn v2.8b, v2.8h -; CHECK-NEXT: and v3.8b, v3.8b, v0.8b -; CHECK-NEXT: and v4.8b, v4.8b, v0.8b -; CHECK-NEXT: orr v0.8b, v3.8b, v1.8b -; CHECK-NEXT: orr v1.8b, v4.8b, v2.8b -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v3.16b, v5.16b, v6.16b +; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b +; CHECK-NEXT: and v0.16b, v3.16b, v0.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %0 = load <8 x i16>, ptr %a, align 2 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index 3254c5ebe9c6b1..ab7cea8dfb7789 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -2825,10 +2825,11 @@ entry: define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { ; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: uaddlv d0, v0.4s ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret @@ -3578,10 +3579,11 @@ entry: define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) { ; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff -; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: movi v2.2d, #0x0000ff000000ff ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: uaddlv d0, v0.4s ; CHECK-SD-NEXT: fmov x0, d0 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll index bfd18f1b52a51b..a3b6c283512f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll @@ -152,3 +152,57 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4 ret ptr addrspace(3) %load } + +; GCN-LABEL: {{^}}atomic_load_monotonic_f16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) { + %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2 + %ret = bitcast half %load to i16 + ret i16 %ret +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_f16_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16 + %load = load atomic half, ptr addrspace(3) %gep monotonic, align 2 + %ret = bitcast half %load to i16 + ret i16 %ret +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_bf16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) { + %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2 + %ret = bitcast bfloat %load to i16 + ret i16 %ret +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_bf16_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) { + %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16 + %load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2 + %ret = bitcast bfloat %load to i16 + ret i16 %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll index 71e24c1692c7f4..cd1e1fb1add473 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll @@ -101,3 +101,56 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val) ret void } +; GCN-LABEL: {{^}}atomic_store_monotonic_f16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) { + %val = bitcast i16 %arg.val to half + store atomic half %val, ptr addrspace(3) %ptr monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_f16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.val) { + %val = bitcast i16 %arg.val to half + %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16 + store atomic half %val, ptr addrspace(3) %gep monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_bf16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) { + %val = bitcast i16 %arg.val to bfloat + store atomic bfloat %val, ptr addrspace(3) %ptr monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_bf16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.val) { + %val = bitcast i16 %arg.val to bfloat + %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16 + store atomic bfloat %val, ptr addrspace(3) %gep monotonic, align 2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index 06ba60518adc04..e44572985e6d2e 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -6741,6 +6741,81 @@ entry: ret void } +define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr %out) { +; GCN1-LABEL: atomic_store_bf16_offset: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GCN1-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: v_mov_b32_e32 v2, s0 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_store_bf16_offset: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GCN2-NEXT: s_load_dword s0, s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: v_mov_b32_e32 v2, s0 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_store_bf16_offset: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GCN3-NEXT: s_load_dword s4, s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: v_mov_b32_e32 v2, s4 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + %gep = getelementptr bfloat, ptr %out, i64 8 + store atomic bfloat %in, ptr %out seq_cst, align 2 + ret void +} + +define amdgpu_kernel void @atomic_store_bf16(bfloat %in, ptr %out) { +; GCN1-LABEL: atomic_store_bf16: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GCN1-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: v_mov_b32_e32 v2, s0 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_store_bf16: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GCN2-NEXT: s_load_dword s0, s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: v_mov_b32_e32 v2, s0 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_store_bf16: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GCN3-NEXT: s_load_dword s4, s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: v_mov_b32_e32 v2, s4 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + store atomic bfloat %in, ptr %out seq_cst, align 2 + ret void +} + define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) { ; GCN1-LABEL: atomic_inc_i32_offset: ; GCN1: ; %bb.0: ; %entry @@ -7868,3 +7943,201 @@ entry: store i32 %val, ptr %out2 ret void } + +define amdgpu_kernel void @atomic_load_f16_offset(ptr %in, ptr %out) { +; GCN1-LABEL: atomic_load_f16_offset: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: s_add_u32 s0, s0, 16 +; GCN1-NEXT: s_addc_u32 s1, s1, 0 +; GCN1-NEXT: v_mov_b32_e32 v0, s0 +; GCN1-NEXT: v_mov_b32_e32 v1, s1 +; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN1-NEXT: buffer_wbinvl1_vol +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_load_f16_offset: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: s_add_u32 s0, s0, 16 +; GCN2-NEXT: s_addc_u32 s1, s1, 0 +; GCN2-NEXT: v_mov_b32_e32 v0, s0 +; GCN2-NEXT: v_mov_b32_e32 v1, s1 +; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN2-NEXT: buffer_wbinvl1_vol +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_load_f16_offset: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s0 +; GCN3-NEXT: v_mov_b32_e32 v1, s1 +; GCN3-NEXT: flat_load_ushort v2, v[0:1] offset:16 glc +; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN3-NEXT: buffer_wbinvl1_vol +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + %gep = getelementptr half, ptr %in, i64 8 + %val = load atomic half, ptr %gep seq_cst, align 2 + store half %val, ptr %out + ret void +} + +define amdgpu_kernel void @atomic_load_f16(ptr %in, ptr %out) { +; GCN1-LABEL: atomic_load_f16: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: v_mov_b32_e32 v0, s0 +; GCN1-NEXT: v_mov_b32_e32 v1, s1 +; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN1-NEXT: buffer_wbinvl1_vol +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_load_f16: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: v_mov_b32_e32 v0, s0 +; GCN2-NEXT: v_mov_b32_e32 v1, s1 +; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN2-NEXT: buffer_wbinvl1_vol +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_load_f16: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s0 +; GCN3-NEXT: v_mov_b32_e32 v1, s1 +; GCN3-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN3-NEXT: buffer_wbinvl1_vol +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + %val = load atomic half, ptr %in seq_cst, align 2 + store half %val, ptr %out + ret void +} + +define amdgpu_kernel void @atomic_load_bf16_offset(ptr %in, ptr %out) { +; GCN1-LABEL: atomic_load_bf16_offset: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: s_add_u32 s0, s0, 16 +; GCN1-NEXT: s_addc_u32 s1, s1, 0 +; GCN1-NEXT: v_mov_b32_e32 v0, s0 +; GCN1-NEXT: v_mov_b32_e32 v1, s1 +; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN1-NEXT: buffer_wbinvl1_vol +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_load_bf16_offset: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: s_add_u32 s0, s0, 16 +; GCN2-NEXT: s_addc_u32 s1, s1, 0 +; GCN2-NEXT: v_mov_b32_e32 v0, s0 +; GCN2-NEXT: v_mov_b32_e32 v1, s1 +; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN2-NEXT: buffer_wbinvl1_vol +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_load_bf16_offset: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s0 +; GCN3-NEXT: v_mov_b32_e32 v1, s1 +; GCN3-NEXT: flat_load_ushort v2, v[0:1] offset:16 glc +; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN3-NEXT: buffer_wbinvl1_vol +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + %gep = getelementptr bfloat, ptr %in, i64 8 + %val = load atomic bfloat, ptr %gep seq_cst, align 2 + store bfloat %val, ptr %out + ret void +} + +define amdgpu_kernel void @atomic_load_bf16(ptr %in, ptr %out) { +; GCN1-LABEL: atomic_load_bf16: +; GCN1: ; %bb.0: +; GCN1-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN1-NEXT: s_waitcnt lgkmcnt(0) +; GCN1-NEXT: v_mov_b32_e32 v0, s0 +; GCN1-NEXT: v_mov_b32_e32 v1, s1 +; GCN1-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN1-NEXT: buffer_wbinvl1_vol +; GCN1-NEXT: v_mov_b32_e32 v0, s2 +; GCN1-NEXT: v_mov_b32_e32 v1, s3 +; GCN1-NEXT: flat_store_short v[0:1], v2 +; GCN1-NEXT: s_endpgm +; +; GCN2-LABEL: atomic_load_bf16: +; GCN2: ; %bb.0: +; GCN2-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN2-NEXT: s_waitcnt lgkmcnt(0) +; GCN2-NEXT: v_mov_b32_e32 v0, s0 +; GCN2-NEXT: v_mov_b32_e32 v1, s1 +; GCN2-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN2-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN2-NEXT: buffer_wbinvl1_vol +; GCN2-NEXT: v_mov_b32_e32 v0, s2 +; GCN2-NEXT: v_mov_b32_e32 v1, s3 +; GCN2-NEXT: flat_store_short v[0:1], v2 +; GCN2-NEXT: s_endpgm +; +; GCN3-LABEL: atomic_load_bf16: +; GCN3: ; %bb.0: +; GCN3-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN3-NEXT: s_waitcnt lgkmcnt(0) +; GCN3-NEXT: v_mov_b32_e32 v0, s0 +; GCN3-NEXT: v_mov_b32_e32 v1, s1 +; GCN3-NEXT: flat_load_ushort v2, v[0:1] glc +; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN3-NEXT: buffer_wbinvl1_vol +; GCN3-NEXT: v_mov_b32_e32 v0, s2 +; GCN3-NEXT: v_mov_b32_e32 v1, s3 +; GCN3-NEXT: flat_store_short v[0:1], v2 +; GCN3-NEXT: s_endpgm + %val = load atomic bfloat, ptr %in seq_cst, align 2 + store bfloat %val, ptr %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 674d7a3c5c9b44..dac3a3db7b450b 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -6216,6 +6216,81 @@ entry: ret void } +define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_store_bf16_offset: +; SI: ; %bb.0: +; SI-NEXT: s_load_dword s4, s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:16 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_store_bf16_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; VI-NEXT: s_load_dword s4, s[0:1], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s0, s2, 16 +; VI-NEXT: s_addc_u32 s1, s3, 0 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_store_bf16_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: global_store_short v0, v1, s[2:3] offset:16 +; GFX9-NEXT: s_endpgm + %gep = getelementptr bfloat, ptr addrspace(1) %out, i64 8 + store atomic bfloat %in, ptr addrspace(1) %gep seq_cst, align 2 + ret void +} + +define amdgpu_kernel void @atomic_store_bf16(bfloat %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_store_bf16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dword s4, s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_store_bf16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; VI-NEXT: s_load_dword s0, s[0:1], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s2 +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_store_bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: global_store_short v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm + store atomic bfloat %in, ptr addrspace(1) %out seq_cst, align 2 + ret void +} + define amdgpu_kernel void @atomic_inc_i32_offset(ptr addrspace(1) %out, i32 %in) { ; SI-LABEL: atomic_inc_i32_offset: ; SI: ; %bb.0: ; %entry @@ -6963,3 +7038,207 @@ entry: store i32 %val, ptr addrspace(1) %out2 ret void } + +define amdgpu_kernel void @atomic_load_f16_offset(ptr addrspace(1) %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_load_f16_offset: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_wbinvl1 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_load_f16_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, s6 +; VI-NEXT: s_mov_b32 s3, s7 +; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_wbinvl1_vol +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_load_f16_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:16 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: global_store_short v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm + %gep = getelementptr half, ptr addrspace(1) %in, i64 8 + %val = load atomic half, ptr addrspace(1) %gep seq_cst, align 2 + store half %val, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @atomic_load_f16_negoffset(ptr addrspace(1) %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_load_f16_negoffset: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 +; SI-NEXT: v_mov_b32_e32 v1, -1 +; SI-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_wbinvl1 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_load_f16_negoffset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s0, s0, 0xfffffe00 +; VI-NEXT: s_addc_u32 s1, s1, -1 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: flat_load_ushort v0, v[0:1] glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_wbinvl1_vol +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_load_f16_negoffset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:-512 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: global_store_short v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm + %gep = getelementptr half, ptr addrspace(1) %in, i64 -256 + %val = load atomic half, ptr addrspace(1) %gep seq_cst, align 2 + store half %val, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @atomic_load_bf16_offset(ptr addrspace(1) %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_load_bf16_offset: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_wbinvl1 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_load_bf16_offset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, s6 +; VI-NEXT: s_mov_b32 s3, s7 +; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:16 glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_wbinvl1_vol +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_load_bf16_offset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:16 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: global_store_short v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm + %gep = getelementptr bfloat, ptr addrspace(1) %in, i64 8 + %val = load atomic bfloat, ptr addrspace(1) %gep seq_cst, align 2 + store bfloat %val, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @atomic_load_bf16_negoffset(ptr addrspace(1) %in, ptr addrspace(1) %out) { +; SI-LABEL: atomic_load_bf16_negoffset: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s2 +; SI-NEXT: s_mov_b32 s5, s3 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_mov_b32_e32 v0, 0xfffffe00 +; SI-NEXT: v_mov_b32_e32 v1, -1 +; SI-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 glc +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: buffer_wbinvl1 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: atomic_load_bf16_negoffset: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_u32 s0, s0, 0xfffffe00 +; VI-NEXT: s_addc_u32 s1, s1, -1 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: flat_load_ushort v0, v[0:1] glc +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: buffer_wbinvl1_vol +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: atomic_load_bf16_negoffset: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v1, v0, s[0:1] offset:-512 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_wbinvl1_vol +; GFX9-NEXT: global_store_short v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm + %gep = getelementptr bfloat, ptr addrspace(1) %in, i64 -256 + %val = load atomic bfloat, ptr addrspace(1) %gep seq_cst, align 2 + store bfloat %val, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll index 14bcc4f994f890..400298bcff4f97 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok. diff --git a/llvm/test/CodeGen/RISCV/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/atomic-load-store.ll index 2d1fc21cda89b0..1586a133568b35 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-store.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-store.ll @@ -1,26 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s -; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso,+seq-cst-trailing-fence -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 141d5ea4182892..61b5e50c6d52f2 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -129,7 +129,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefixes=CHECK,RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64ZMMUL %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+zmmul %s -o - | FileCheck --check-prefixes=CHECK,RV64MZMMUL %s -; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV64A %s +; RUN: llc -mtriple=riscv64 -mattr=+a,no-trailing-seq-cst-fence %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6C %s +; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefixes=CHECK,RV64A,A6S %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV64F %s ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV64D %s ; RUN: llc -mtriple=riscv64 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV64C %s @@ -516,3 +517,10 @@ define i32 @addi(i32 %a) { %1 = add i32 %a, 1 ret i32 %1 } + +define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { + %1 = load atomic i8, ptr %a seq_cst, align 1 + ret i8 %1 +; A6S: .attribute 14, 2 +; A6C: .attribute 14, 1 +} diff --git a/llvm/test/CodeGen/RISCV/fixups-diff.ll b/llvm/test/CodeGen/RISCV/fixups-diff.ll index 84a7d18ed15068..cc1c87b1fe377f 100644 --- a/llvm/test/CodeGen/RISCV/fixups-diff.ll +++ b/llvm/test/CodeGen/RISCV/fixups-diff.ll @@ -27,7 +27,7 @@ entry: ; CHECK: } ; CHECK: Section {{.*}} .rela.eh_frame { -; CHECK-NEXT: 0x1C R_RISCV_32_PCREL .L0 0x0 +; CHECK-NEXT: 0x1C R_RISCV_32_PCREL 0x0 ; CHECK-NEXT: } !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index c303690aadfff8..44db3c49db8c3b 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC +; RUN: llc -mtriple=riscv32 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32,RV32-NO-ATOMIC -; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC -; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC +; RUN: llc -mtriple=riscv32 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV32,RV32-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv64 -mattr=+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-NO-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC -; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+seq-cst-trailing-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING +; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics,+no-trailing-seq-cst-fence < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC +; RUN: llc -mtriple=riscv64 -mattr=+forced-atomics < %s | FileCheck %s --check-prefixes=RV64,RV64-ATOMIC-TRAILING define i8 @load8(ptr %p) nounwind { ; RV32-NO-ATOMIC-LABEL: load8: diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 0745b59c06cc8d..817e2b7d0bd993 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -4,7 +4,9 @@ ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBANOZBB ; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBNOZBS +; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbs -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV64ZBA,RV64ZBAZBB,RV64ZBAZBBZBS define i64 @slliuw(i64 %a) nounwind { ; RV64I-LABEL: slliuw: @@ -2733,3 +2735,121 @@ define i64 @mul_neg8(i64 %a) { %c = mul i64 %a, -8 ret i64 %c } + +define i64 @bext_mul12(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul12: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 12 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul12: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: sh1add a0, a0, a0 +; RV64ZBANOZBB-NEXT: slli a0, a0, 2 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul12: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: sh1add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: slli a0, a0, 2 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul12: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: sh1add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: slli a0, a0, 2 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 12 + ret i64 %6 +} + +define i64 @bext_mul45(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul45: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 45 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul45: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: sh2add a0, a0, a0 +; RV64ZBANOZBB-NEXT: sh3add a0, a0, a0 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul45: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: sh3add a0, a0, a0 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul45: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: sh3add a0, a0, a0 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 45 + ret i64 %6 +} + +define i64 @bext_mul132(i32 %1, i32 %2) { +; RV64I-LABEL: bext_mul132: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: li a1, 132 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBANOZBB-LABEL: bext_mul132: +; RV64ZBANOZBB: # %bb.0: # %entry +; RV64ZBANOZBB-NEXT: srlw a0, a0, a1 +; RV64ZBANOZBB-NEXT: andi a0, a0, 1 +; RV64ZBANOZBB-NEXT: slli a1, a0, 7 +; RV64ZBANOZBB-NEXT: sh2add a0, a0, a1 +; RV64ZBANOZBB-NEXT: ret +; +; RV64ZBAZBBNOZBS-LABEL: bext_mul132: +; RV64ZBAZBBNOZBS: # %bb.0: # %entry +; RV64ZBAZBBNOZBS-NEXT: srlw a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: andi a0, a0, 1 +; RV64ZBAZBBNOZBS-NEXT: slli a1, a0, 7 +; RV64ZBAZBBNOZBS-NEXT: sh2add a0, a0, a1 +; RV64ZBAZBBNOZBS-NEXT: ret +; +; RV64ZBAZBBZBS-LABEL: bext_mul132: +; RV64ZBAZBBZBS: # %bb.0: # %entry +; RV64ZBAZBBZBS-NEXT: bext a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: slli a1, a0, 7 +; RV64ZBAZBBZBS-NEXT: sh2add a0, a0, a1 +; RV64ZBAZBBZBS-NEXT: ret +entry: + %3 = lshr i32 %1, %2 + %4 = and i32 %3, 1 + %5 = zext nneg i32 %4 to i64 + %6 = mul i64 %5, 132 + ret i64 %6 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll new file mode 100644 index 00000000000000..6435c1c14e061e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -0,0 +1,253 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr='+v' -O3 %s -o - | FileCheck %s + +declare @llvm.riscv.vadd.nxv1i8.nxv1i8( + , + , + , + i32) + +declare @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + , + , + , + , + i32, i32) + +declare @llvm.riscv.vsub.nxv1i8.nxv1i8( + , + , + , + i32) + +declare @llvm.riscv.vmul.nxv1i8.nxv1i8( + , + , + , + i32) + +define @simple_vadd_vv( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: simple_vadd_vv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vadd.vv v9, v8, v9 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + undef, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + undef, + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + undef, + %0, + %b, + i32 %2) + + ret %c +} + +define @simple_vadd_vsub_vv( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: simple_vadd_vsub_vv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vsub.nxv1i8.nxv1i8( + undef, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + undef, + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + undef, + %0, + %b, + i32 %2) + + ret %c +} + +define @simple_vmul_vv( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: simple_vmul_vv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v8, v8, v8 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmul.nxv1i8.nxv1i8( + undef, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vmul.nxv1i8.nxv1i8( + undef, + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vmul.nxv1i8.nxv1i8( + undef, + %0, + %b, + i32 %2) + + ret %c +} + +; With passthru and masks. +define @vadd_vv_passthru( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: vadd_vv_passthru: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vadd.vv v9, v8, v8 +; CHECK-NEXT: vadd.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %0, + %b, + i32 %2) + + ret %c +} + +define @vadd_vv_passthru_negative( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: vadd_vv_passthru_negative: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vadd.vv v10, v8, v9 +; CHECK-NEXT: vadd.vv v9, v8, v10 +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %0, + %1, + i32 %2) + + %b = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %1, + %0, + %a, + i32 %2) + + %c = call @llvm.riscv.vadd.nxv1i8.nxv1i8( + %0, + %0, + %b, + i32 %2) + + ret %c +} + +define @vadd_vv_mask( %0, %1, i32 %2, %m) nounwind { +; CHECK-LABEL: vadd_vv_mask: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vadd.vv v9, v8, v8, v0.t +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %1, + %m, + i32 %2, i32 1) + + %b = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %a, + %m, + i32 %2, i32 1) + + %c = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %b, + %m, + i32 %2, i32 1) + + ret %c +} + +define @vadd_vv_mask_negative( %0, %1, i32 %2, %m, %m2) nounwind { +; CHECK-LABEL: vadd_vv_mask_negative: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: vadd.vv v11, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vadd.vv v9, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %1, + %m, + i32 %2, i32 1) + + %b = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %a, + %m, + i32 %2, i32 1) + + %c = call @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( + %0, + %0, + %b, + %m2, + i32 %2, i32 1) + + ret %c +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 621445fb2dc5e4..4ff2fc7a5fff5d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -487,6 +487,54 @@ for.end: ; preds = %for.body, %entry ret void } +define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) { +; CHECK-LABEL: saxpy_vec_demanded_fields: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma +; CHECK-NEXT: beqz a3, .LBB9_2 +; CHECK-NEXT: .LBB9_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vse32.v v16, (a2) +; CHECK-NEXT: sub a0, a0, a3 +; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: bnez a3, .LBB9_1 +; CHECK-NEXT: .LBB9_2: # %for.end +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) + %cmp.not13 = icmp eq i64 %0, 0 + br i1 %cmp.not13, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %1 = phi i64 [ %7, %for.body ], [ %0, %entry ] + %n.addr.016 = phi i64 [ %sub, %for.body ], [ %n, %entry ] + %x.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] + %2 = bitcast ptr %x.addr.015 to ptr + %3 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, ptr %2, i64 %1) + %add.ptr = getelementptr inbounds float, ptr %x.addr.015, i64 %1 + %4 = bitcast ptr %y.addr.014 to ptr + %5 = tail call @llvm.riscv.vle.nxv16f32.i64( undef, ptr %4, i64 %1) + %6 = tail call @llvm.riscv.vfmacc.nxv16f32.f32.i64( %5, float %a, %3, i64 7, i64 %1, i64 0) + tail call void @llvm.riscv.vse.nxv16f32.i64( %6, ptr %4, i64 %1) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.014, i64 %1 + %sub = sub i64 %n.addr.016, %1 + %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub, i64 1, i64 2) + %cmp.not = icmp eq i64 %7, 0 + br i1 %cmp.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) declare @llvm.riscv.vle.nxv16f32.i64(, ptr nocapture, i64) declare @llvm.riscv.vfmacc.nxv16f32.f32.i64(, float, , i64, i64, i64) @@ -501,12 +549,12 @@ define @test_vsetvli_x0_x0(ptr %x, ptr %y, ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: andi a3, a3, 1 -; CHECK-NEXT: beqz a3, .LBB9_2 +; CHECK-NEXT: beqz a3, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwcvt.x.x.v v8, v10 -; CHECK-NEXT: .LBB9_2: # %if.end +; CHECK-NEXT: .LBB10_2: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret @@ -540,19 +588,19 @@ define @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: andi a4, a4, 1 -; CHECK-NEXT: beqz a4, .LBB10_2 +; CHECK-NEXT: beqz a4, .LBB11_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwadd.wv v9, v9, v10 -; CHECK-NEXT: .LBB10_2: # %if.end +; CHECK-NEXT: .LBB11_2: # %if.end ; CHECK-NEXT: andi a5, a5, 1 -; CHECK-NEXT: beqz a5, .LBB10_4 +; CHECK-NEXT: beqz a5, .LBB11_4 ; CHECK-NEXT: # %bb.3: # %if2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: vwadd.wv v9, v9, v10 -; CHECK-NEXT: .LBB10_4: # %if2.end +; CHECK-NEXT: .LBB11_4: # %if2.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v8 ; CHECK-NEXT: ret @@ -586,11 +634,11 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) { ; CHECK-LABEL: vlmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; CHECK-NEXT: blez a0, .LBB11_3 +; CHECK-NEXT: blez a0, .LBB12_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: slli a4, a6, 3 -; CHECK-NEXT: .LBB11_2: # %for.body +; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: vle64.v v9, (a3) @@ -600,8 +648,8 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) { ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: add a3, a3, a4 ; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: blt a5, a0, .LBB11_2 -; CHECK-NEXT: .LBB11_3: # %for.end +; CHECK-NEXT: blt a5, a0, .LBB12_2 +; CHECK-NEXT: .LBB12_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) @@ -633,18 +681,18 @@ define void @vector_init_vlmax(i64 %N, ptr %c) { ; CHECK-LABEL: vector_init_vlmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: blez a0, .LBB12_3 +; CHECK-NEXT: blez a0, .LBB13_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: .LBB12_2: # %for.body +; CHECK-NEXT: .LBB13_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: blt a3, a0, .LBB12_2 -; CHECK-NEXT: .LBB12_3: # %for.end +; CHECK-NEXT: blt a3, a0, .LBB13_2 +; CHECK-NEXT: .LBB13_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvlimax.i64(i64 3, i64 0) @@ -669,20 +717,20 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) { ; CHECK-LABEL: vector_init_vsetvli_N: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli a2, a0, e64, m1, ta, ma -; CHECK-NEXT: blez a0, .LBB13_3 +; CHECK-NEXT: blez a0, .LBB14_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: .LBB13_2: # %for.body +; CHECK-NEXT: .LBB14_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: blt a3, a0, .LBB13_2 -; CHECK-NEXT: .LBB13_3: # %for.end +; CHECK-NEXT: blt a3, a0, .LBB14_2 +; CHECK-NEXT: .LBB14_3: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli(i64 %N, i64 3, i64 0) @@ -711,13 +759,13 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) { ; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: .LBB14_1: # %for.body +; CHECK-NEXT: .LBB15_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a1, a1, a4 -; CHECK-NEXT: blt a2, a0, .LBB14_1 +; CHECK-NEXT: blt a2, a0, .LBB15_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: @@ -745,13 +793,13 @@ define void @vector_init_vsetvli_fv2(i64 %N, ptr %c) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: .LBB15_1: # %for.body +; CHECK-NEXT: .LBB16_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 -; CHECK-NEXT: blt a2, a0, .LBB15_1 +; CHECK-NEXT: blt a2, a0, .LBB16_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: @@ -779,13 +827,13 @@ define void @vector_init_vsetvli_fv3(i64 %N, ptr %c) { ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: .LBB16_1: # %for.body +; CHECK-NEXT: .LBB17_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a2, a2, 4 ; CHECK-NEXT: addi a1, a1, 32 -; CHECK-NEXT: blt a2, a0, .LBB16_1 +; CHECK-NEXT: blt a2, a0, .LBB17_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret entry: @@ -861,10 +909,10 @@ define @compat_store_consistency(i1 %cond, %a, %b @@ -886,16 +934,16 @@ define @test_ratio_only_vmv_s_x(ptr %x, ptr %y, i1 %cond) nou ; CHECK-LABEL: test_ratio_only_vmv_s_x: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a2, 1 -; CHECK-NEXT: beqz a2, .LBB20_2 +; CHECK-NEXT: beqz a2, .LBB21_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vwcvt.x.x.v v8, v9 -; CHECK-NEXT: j .LBB20_3 -; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: j .LBB21_3 +; CHECK-NEXT: .LBB21_2: ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: .LBB20_3: # %if.end +; CHECK-NEXT: .LBB21_3: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: ret @@ -918,16 +966,16 @@ define @test_ratio_only_vmv_s_x2(ptr %x, ptr %y, i1 %cond) no ; CHECK-LABEL: test_ratio_only_vmv_s_x2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a2, a2, 1 -; CHECK-NEXT: beqz a2, .LBB21_2 +; CHECK-NEXT: beqz a2, .LBB22_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: j .LBB21_3 -; CHECK-NEXT: .LBB21_2: +; CHECK-NEXT: j .LBB22_3 +; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vwcvt.x.x.v v8, v9 -; CHECK-NEXT: .LBB21_3: # %if.end +; CHECK-NEXT: .LBB22_3: # %if.end ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: ret @@ -953,13 +1001,13 @@ define void @pre_over_vle(ptr %A) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi a1, a0, 800 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: .LBB22_1: # %vector.body +; CHECK-NEXT: .LBB23_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: addi a0, a0, 8 -; CHECK-NEXT: bne a0, a1, .LBB22_1 +; CHECK-NEXT: bne a0, a1, .LBB23_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: ret entry: diff --git a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll index d28836d560377e..d6a1d8d6e1366f 100644 --- a/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll +++ b/llvm/test/DebugInfo/LoongArch/dwarf-loongarch-relocs.ll @@ -18,21 +18,21 @@ ; RELOCS-BOTH: Section ({{.*}}) .rela.debug_frame { ; RELOCS-NORL-NEXT: 0x1C R_LARCH_32 .debug_frame 0x0 ; RELOCS-NORL-NEXT: 0x20 R_LARCH_64 .text 0x0 -; RELOCS-ENRL-NEXT: 0x1C R_LARCH_32 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x20 R_LARCH_64 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x28 R_LARCH_ADD64 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x28 R_LARCH_SUB64 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x3F R_LARCH_ADD6 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x3F R_LARCH_SUB6 .L0 0x0 +; RELOCS-ENRL-NEXT: 0x1C R_LARCH_32 0x0 +; RELOCS-ENRL-NEXT: 0x20 R_LARCH_64 0x0 +; RELOCS-ENRL-NEXT: 0x28 R_LARCH_ADD64 0x0 +; RELOCS-ENRL-NEXT: 0x28 R_LARCH_SUB64 0x0 +; RELOCS-ENRL-NEXT: 0x3F R_LARCH_ADD6 0x0 +; RELOCS-ENRL-NEXT: 0x3F R_LARCH_SUB6 0x0 ; RELOCS-BOTH-NEXT: } ; RELOCS-BOTH: Section ({{.*}}) .rela.debug_line { ; RELOCS-BOTH-NEXT: 0x22 R_LARCH_32 .debug_line_str 0x0 ; RELOCS-BOTH-NEXT: 0x31 R_LARCH_32 .debug_line_str 0x2 ; RELOCS-BOTH-NEXT: 0x46 R_LARCH_32 .debug_line_str 0x1B ; RELOCS-NORL-NEXT: 0x4F R_LARCH_64 .text 0x0 -; RELOCS-ENRL-NEXT: 0x4F R_LARCH_64 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x5F R_LARCH_ADD16 .L0 0x0 -; RELOCS-ENRL-NEXT: 0x5F R_LARCH_SUB16 .L0 0x0 +; RELOCS-ENRL-NEXT: 0x4F R_LARCH_64 0x0 +; RELOCS-ENRL-NEXT: 0x5F R_LARCH_ADD16 0x0 +; RELOCS-ENRL-NEXT: 0x5F R_LARCH_SUB16 0x0 ; RELOCS-BOTH-NEXT: } ; RELOCS-BOTH-NEXT: ] diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll index 99594b5e01e955..e5de1713f4e00d 100644 --- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll +++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll @@ -6,14 +6,14 @@ ; Check that we actually have relocations, otherwise this is kind of pointless. ; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_info { -; READOBJ-RELOCS: 0x1B R_RISCV_ADD32 .L0 0x0 -; READOBJ-RELOCS-NEXT: 0x1B R_RISCV_SUB32 .L0 0x0 +; READOBJ-RELOCS: 0x1B R_RISCV_ADD32 0x0 +; READOBJ-RELOCS-NEXT: 0x1B R_RISCV_SUB32 0x0 ; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_frame { -; READOBJ-RELOCS: 0x20 R_RISCV_ADD32 .L0 0x0 -; READOBJ-RELOCS-NEXT: 0x20 R_RISCV_SUB32 .L0 0x0 +; READOBJ-RELOCS: 0x20 R_RISCV_ADD32 0x0 +; READOBJ-RELOCS-NEXT: 0x20 R_RISCV_SUB32 0x0 ; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_line { -; READOBJ-RELOCS: 0x5A R_RISCV_ADD16 .L0 0x0 -; READOBJ-RELOCS-NEXT: 0x5A R_RISCV_SUB16 .L0 0x0 +; READOBJ-RELOCS: 0x5A R_RISCV_ADD16 0x0 +; READOBJ-RELOCS-NEXT: 0x5A R_RISCV_SUB16 0x0 ; Check that we can print the source, even with relocations. ; OBJDUMP-SOURCE: Disassembly of section .text: diff --git a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll index ffef0ec2340684..f655a7c0a7ef42 100644 --- a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll +++ b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll @@ -4,11 +4,11 @@ ; RUN: | FileCheck -check-prefix=RELAX-DWARFDUMP %s ; ; RELAX: Section ({{.*}}) .rela.eh_frame { -; RELAX-NEXT: 0x1C R_RISCV_32_PCREL .L0 0x0 -; RELAX-NEXT: 0x30 R_RISCV_32_PCREL .L0 0x0 -; RELAX-NEXT: 0x44 R_RISCV_32_PCREL .L0 0x0 -; RELAX-NEXT: 0x48 R_RISCV_ADD32 .L0 0x0 -; RELAX-NEXT: 0x48 R_RISCV_SUB32 .L0 0x0 +; RELAX-NEXT: 0x1C R_RISCV_32_PCREL 0x0 +; RELAX-NEXT: 0x30 R_RISCV_32_PCREL 0x0 +; RELAX-NEXT: 0x44 R_RISCV_32_PCREL 0x0 +; RELAX-NEXT: 0x48 R_RISCV_ADD32 0x0 +; RELAX-NEXT: 0x48 R_RISCV_SUB32 0x0 ; RELAX-NEXT: } ; RELAX-DWARFDUMP-NOT: error: failed to compute relocation diff --git a/llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s b/llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s similarity index 71% rename from llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s rename to llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s index 0b54f104ab953e..1e0fa8a3061830 100644 --- a/llvm/test/DebugInfo/Symbolize/ELF/riscv-temporary-symbol.s +++ b/llvm/test/DebugInfo/Symbolize/ELF/riscv-empty-name-symbol.s @@ -1,11 +1,10 @@ # REQUIRES: riscv-registered-target -## Ignore .L0 symbols that are generated by LLVM integrated assembler and GNU -## assembler for .debug_line/.eh_frame related assembler directives. +## Ignore empty name symbols. # RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t # RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYM -# SYM: 0000000000000004 0 NOTYPE LOCAL DEFAULT [[#]] .L0 {{$}} +# SYM: 0000000000000004 0 NOTYPE LOCAL DEFAULT [[#]] {{$}} # SYM: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [[#]] foo ## Make sure we test at an address larger than or equal to an empty name symbol. diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s index e7114e4d643c6f..a5038022dfe0c3 100644 --- a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s @@ -7,7 +7,7 @@ # the section start and section end. So that by relocating these symbol, the section length # can be calculated. # -# CHECK: Creating defined graph symbol for ELF symbol ".L0 " +# CHECK: Creating defined graph symbol for ELF symbol "" # CHECK: Creating defined graph symbol for ELF symbol "main" .text .globl main diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll new file mode 100644 index 00000000000000..39b2b6225d8b10 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -0,0 +1,3617 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test1( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test88( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test87( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test86( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test85( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test84( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test83( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test82( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test81( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test80( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test79( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test78( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test77( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test76( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test75( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test74( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to x86_mmx +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to x86_mmx +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[TMP14]], x86_mmx [[TMP15]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[_MSPROP_VECTOR_PACK]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone + +define i64 @test73(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test73( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone + +define i64 @test72(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test72_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 0) nounwind + %2 = bitcast x86_mmx %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone + +define i64 @test71(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test71( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[TMP6]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to i64 + ret i64 %2 +} + +declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone + +define i64 @test70(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test70_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 0) nounwind + %2 = bitcast x86_mmx %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone + +define i64 @test69(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test69( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone + +define i64 @test68(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test68( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[TMP6]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to i64 + ret i64 %2 +} + +declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone + +define i64 @test67(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test67( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone + +define i64 @test66(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind + %2 = bitcast x86_mmx %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test66_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP9]] to x86_mmx +; CHECK-NEXT: [[TMP1:%.*]] = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[TMP10]], i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast x86_mmx [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 0) nounwind + %2 = bitcast x86_mmx %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test65( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test64( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test63( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test62( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test61( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test60( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[_MSPROP]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[TMP6]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP3]], [[TMP10]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test59( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test58( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP12]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[TMP8]], x86_mmx [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test56( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test55( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test54( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test53( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test52( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test51( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test50( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test49( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP18]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test48( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test47( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test46( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test45( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test44( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone + +declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test43( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test42( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test41( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test40( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test39( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test38( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test37( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test36( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test35( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test34( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test33( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test32( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 48 +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test31( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test30( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test29( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test28( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test27( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test26( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind + +define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test25( +; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], x86_mmx [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to x86_mmx + tail call void @llvm.x86.mmx.movnt.dq(ptr %p, x86_mmx %mmx_var.i) nounwind + ret void +} + +declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone + +define i32 @test24(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test24( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP6]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx + %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind + ret i32 %1 +} + +declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind + +define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test23( +; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP5]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP3]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: tail call void @llvm.x86.mmx.maskmovq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast <1 x i64> %n to <8 x i8> + %1 = bitcast <1 x i64> %d to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx + %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx + tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, ptr %p) nounwind + ret void +} + +declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test22( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone + +define i64 @test21(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test21( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i32 @test21_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test21_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP5]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <2 x i32> + %5 = extractelement <2 x i32> %4, i32 0 + ret i32 %5 +} + +declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test20( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx [[MMX_VAR_I]], x86_mmx [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx + %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone + +define <2 x double> @test19(<1 x i64> %a) #0 { +; CHECK-LABEL: define <2 x double> @test19( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP7]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %1 = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone + ret <2 x double> %2 +} + +declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone + +define i64 @test18(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test18( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast x86_mmx %0 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = extractelement <1 x i64> %2, i32 0 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone + +define i64 @test17(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test17( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP0:%.*]] = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast x86_mmx %0 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = extractelement <1 x i64> %2, i32 0 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone + +define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test16( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[_MSPROP1]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx [[MMX_VAR]], x86_mmx [[MMX_VAR1]], i8 16) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to i64 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to x86_mmx + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) + %3 = bitcast x86_mmx %2 to i64 + ret i64 %3 +} + +declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone + +define i64 @test15(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test15( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %1 = bitcast <2 x i32> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone + %3 = bitcast x86_mmx %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone + +define i64 @test14(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test14( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone + +define i64 @test13(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test13( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP11]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast x86_mmx [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <8 x i8> + %1 = bitcast <8 x i8> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone + %3 = bitcast x86_mmx %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test12( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to x86_mmx + %3 = bitcast <2 x i32> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test11( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test10( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to x86_mmx + %3 = bitcast <8 x i8> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test9( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to x86_mmx + %3 = bitcast <8 x i8> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test8( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test7( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP20]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to x86_mmx + %3 = bitcast <8 x i8> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test6( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test5( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to x86_mmx + %3 = bitcast <2 x i32> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test4( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test3( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to x86_mmx + %3 = bitcast <4 x i16> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone + +define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test2( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to x86_mmx +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to x86_mmx +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx [[TMP2]], x86_mmx [[TMP3]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast x86_mmx [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to x86_mmx + %3 = bitcast <2 x i32> %0 to x86_mmx + %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone + %5 = bitcast x86_mmx %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind #0 { +; ALL-LABEL: test89: +; ALL: # %bb.0: +; ALL-NEXT: cvtpi2ps %mm0, %xmm0 +; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define <4 x float> @test89( +; CHECK-SAME: <4 x float> [[A:%.*]], x86_mmx [[B:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], x86_mmx [[B]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[C]] +; + %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) + ret <4 x float> %c +} + +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone + +define void @test90() #0 { +; ALL-LABEL: test90: +; ALL: # %bb.0: +; ALL-NEXT: emms +; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define void @test90( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.mmx.emms() +; CHECK-NEXT: ret void +; + call void @llvm.x86.mmx.emms() + ret void +} + +declare void @llvm.x86.mmx.emms() + +define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 { +; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx [[TMP0]], i32 [[D]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_mmx [[TMP1]] to <1 x i64> +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP2]] +; +entry: + %0 = bitcast <1 x i64> %a.coerce to x86_mmx + %1 = tail call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %0, i32 %d, i32 2) + %2 = bitcast x86_mmx %1 to <1 x i64> + ret <1 x i64> %2 +} + +declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32 immarg) + +define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 { +; CHECK-LABEL: define i32 @test_mm_extract_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A_COERCE]] to x86_mmx +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx [[TMP0]], i32 2) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = bitcast <1 x i64> %a.coerce to x86_mmx + %1 = tail call i32 @llvm.x86.mmx.pextr.w(x86_mmx %0, i32 2) + ret i32 %1 +} + +declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32 immarg) + +attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/vscale.ll b/llvm/test/Instrumentation/MemorySanitizer/vscale.ll new file mode 100644 index 00000000000000..b1c64188157077 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/vscale.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -S -msan-check-access-address=0 -passes="msan" 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test_load_store_i32(ptr %a, ptr %b) sanitize_memory { +; CHECK-LABEL: define void @test_load_store_i32( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[A]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store [[_MSLD]], ptr [[TMP7]], align 16 +; CHECK-NEXT: store [[TMP1]], ptr [[B]], align 16 +; CHECK-NEXT: ret void +; + %1 = load , ptr %a + store %1, ptr %b + ret void +} + +define void @test_load_store_add_int(ptr %a, ptr %b) sanitize_memory { +; CHECK-LABEL: define void @test_load_store_add_int( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[A]], align 64 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[B]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load , ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: store [[_MSLD1]], ptr [[TMP12]], align 64 +; CHECK-NEXT: store [[TMP5]], ptr [[B]], align 64 +; CHECK-NEXT: ret void +; + %1 = load , ptr %a + %2 = load , ptr %b + %3 = add %1, %2 + store %2, ptr %b + ret void +} + +define void @test_load_store_float(ptr %a, ptr %b) sanitize_memory { +; CHECK-LABEL: define void @test_load_store_float( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[A]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store [[_MSLD]], ptr [[TMP7]], align 16 +; CHECK-NEXT: store [[TMP1]], ptr [[B]], align 16 +; CHECK-NEXT: ret void +; + %1 = load , ptr %a + store %1, ptr %b + ret void +} + +define void @test_load_store_add_float(ptr %a, ptr %b) sanitize_memory { +; CHECK-LABEL: define void @test_load_store_add_float( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[A]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[B]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: store [[_MSLD1]], ptr [[TMP12]], align 8 +; CHECK-NEXT: store [[TMP5]], ptr [[B]], align 8 +; CHECK-NEXT: ret void +; + %1 = load , ptr %a + %2 = load , ptr %b + %3 = fadd %1, %2 + store %2, ptr %b + ret void +} diff --git a/llvm/test/MC/ELF/RISCV/gen-dwarf.s b/llvm/test/MC/ELF/RISCV/gen-dwarf.s index 34d02f5da44f29..342ed1cc0e7ef9 100644 --- a/llvm/test/MC/ELF/RISCV/gen-dwarf.s +++ b/llvm/test/MC/ELF/RISCV/gen-dwarf.s @@ -40,28 +40,28 @@ # CHECK-NEXT: 0x00000020: [DW_RLE_end_of_list ] # RELOC: Section ([[#]]) .rela.eh_frame { -# RELOC-NEXT: 0x1C R_RISCV_32_PCREL .L0 0x0 -# RELOC-NEXT: 0x20 R_RISCV_ADD32 .L0 0x0 -# RELOC-NEXT: 0x20 R_RISCV_SUB32 .L0 0x0 -# RELOC-NEXT: 0x25 R_RISCV_SET6 .L0 0x0 -# RELOC-NEXT: 0x25 R_RISCV_SUB6 .L0 0x0 -# RELOC-NEXT: 0x34 R_RISCV_32_PCREL .L0 0x0 +# RELOC-NEXT: 0x1C R_RISCV_32_PCREL 0x0 +# RELOC-NEXT: 0x20 R_RISCV_ADD32 0x0 +# RELOC-NEXT: 0x20 R_RISCV_SUB32 0x0 +# RELOC-NEXT: 0x25 R_RISCV_SET6 0x0 +# RELOC-NEXT: 0x25 R_RISCV_SUB6 0x0 +# RELOC-NEXT: 0x34 R_RISCV_32_PCREL 0x0 # RELOC-NEXT: } # RELOC: Section ([[#]]) .rela.debug_rnglists { # RELOC-NEXT: 0xD R_RISCV_64 .text.foo 0x0 -# RELOC-NEXT: 0x15 R_RISCV_SET_ULEB128 .L0 0x0 +# RELOC-NEXT: 0x15 R_RISCV_SET_ULEB128 0x0 # RELOC-NEXT: 0x15 R_RISCV_SUB_ULEB128 .text.foo 0x0 # RELOC-NEXT: 0x17 R_RISCV_64 .text.bar 0x0 # RELOC-NEXT: } # RELOC: Section ([[#]]) .rela.debug_line { -# RELOC: R_RISCV_ADD16 .L0 0x0 -# RELOC-NEXT: R_RISCV_SUB16 .L0 0x0 -# RELOC-NEXT: R_RISCV_ADD16 .L0 0x0 -# RELOC-NEXT: R_RISCV_SUB16 .L0 0x0 -# RELOC-NEXT: R_RISCV_ADD16 .L0 0x0 -# RELOC-NEXT: R_RISCV_SUB16 .L0 0x0 +# RELOC: R_RISCV_ADD16 0x0 +# RELOC-NEXT: R_RISCV_SUB16 0x0 +# RELOC-NEXT: R_RISCV_ADD16 0x0 +# RELOC-NEXT: R_RISCV_SUB16 0x0 +# RELOC-NEXT: R_RISCV_ADD16 0x0 +# RELOC-NEXT: R_RISCV_SUB16 0x0 # RELOC: } # RELOC: Hex dump of section '.eh_frame': diff --git a/llvm/test/MC/RISCV/attribute.s b/llvm/test/MC/RISCV/attribute.s index 56f0cb1daf176f..75b9c65ed1cc2f 100644 --- a/llvm/test/MC/RISCV/attribute.s +++ b/llvm/test/MC/RISCV/attribute.s @@ -24,3 +24,6 @@ .attribute priv_spec_revision, 0 # CHECK: attribute 12, 0 + +.attribute atomic_abi, 0 +# CHECK: attribute 14, 0 diff --git a/llvm/test/MC/RISCV/cfi-advance.s b/llvm/test/MC/RISCV/cfi-advance.s index b99af38f553aa0..c4af390be757da 100644 --- a/llvm/test/MC/RISCV/cfi-advance.s +++ b/llvm/test/MC/RISCV/cfi-advance.s @@ -1,27 +1,13 @@ # RUN: llvm-mc -filetype=obj -triple riscv32 %s -o %t.o -# RUN: llvm-readelf -sr %t.o | FileCheck %s +# RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=CHECK %s # RUN: llvm-dwarfdump --debug-frame %t.o 2>&1 \ # RUN: | FileCheck -check-prefix=CHECK-DWARFDUMP %s - -# CHECK: Relocation section '.rela.text1' at offset {{.*}} contains 1 entries: -# CHECK-NEXT: Offset Info Type Sym. Value Symbol's Name + Addend -# CHECK-NEXT: 00000000 00000313 R_RISCV_CALL_PLT 00000004 .L0 + 0 -# CHECK-EMPTY: -# CHECK-NEXT: Relocation section '.rela.eh_frame' at offset {{.*}} contains 3 entries: -# CHECK: Offset Info Type Sym. Value Symbol's Name + Addend -# CHECK-NEXT: 0000001c 00000139 R_RISCV_32_PCREL 00000000 .L0 + 0 -# CHECK-NEXT: 00000035 00000b35 R_RISCV_SET6 00010178 .L0 + 0 -# CHECK-NEXT: 00000035 00000934 R_RISCV_SUB6 0001016e .L0 + 0 -# CHECK-EMPTY: -# CHECK: Symbol table '.symtab' contains 15 entries: -# CHECK-NEXT: Num: Value Size Type Bind Vis Ndx Name -# CHECK-NEXT: 0: 00000000 0 NOTYPE LOCAL DEFAULT UND -# CHECK-NEXT: 1: 00000000 0 NOTYPE LOCAL DEFAULT 2 .L0 {{$}} -# CHECK: 3: 00000004 0 NOTYPE LOCAL DEFAULT 2 .L0{{$}} -# CHECK: 9: 0001016e 0 NOTYPE LOCAL DEFAULT 2 .L0 {{$}} -# CHECK: 11: 00010178 0 NOTYPE LOCAL DEFAULT 2 .L0 {{$}} - +# CHECK: .rela.eh_frame { +# CHECK-NEXT: 0x1C R_RISCV_32_PCREL 0x0 +# CHECK-NEXT: 0x35 R_RISCV_SET6 0x0 +# CHECK-NEXT: 0x35 R_RISCV_SUB6 0x0 +# CHECK-NEXT: } # CHECK-DWARFDUMP: DW_CFA_advance_loc1: 104 # CHECK-DWARFDUMP-NEXT: DW_CFA_def_cfa_offset: +8 # CHECK-DWARFDUMP-NEXT: DW_CFA_advance_loc2: 259 @@ -37,9 +23,6 @@ test: .cfi_startproc nop -## This looks similar to fake label names ".L0 ". Even if this is ".L0 ", -## the assembler will not conflate it with fake labels. -.L0: .zero 100, 0x90 .cfi_def_cfa_offset 8 nop @@ -53,6 +36,3 @@ test: .cfi_def_cfa_offset 8 nop .cfi_endproc - -.section .text1,"ax" -call .L0 diff --git a/llvm/test/MC/RISCV/fde-reloc.s b/llvm/test/MC/RISCV/fde-reloc.s index 81ec426c8b6165..1db8929e074703 100644 --- a/llvm/test/MC/RISCV/fde-reloc.s +++ b/llvm/test/MC/RISCV/fde-reloc.s @@ -12,7 +12,7 @@ func: .cfi_endproc # CHECK: Section (4) .rela.eh_frame { -# CHECK-NEXT: 0x1C R_RISCV_32_PCREL .L0 0x0 +# CHECK-NEXT: 0x1C R_RISCV_32_PCREL 0x0 # CHECK-NEXT: } # CHECK: Hex dump of section '.eh_frame': # CHECK-NEXT: 0x00000000 10000000 00000000 017a5200 017c0101 diff --git a/llvm/test/MC/RISCV/invalid-attribute.s b/llvm/test/MC/RISCV/invalid-attribute.s index 1d732af83cda35..2ebf7ddc9aff85 100644 --- a/llvm/test/MC/RISCV/invalid-attribute.s +++ b/llvm/test/MC/RISCV/invalid-attribute.s @@ -33,3 +33,6 @@ .attribute arch, 30 # CHECK: [[@LINE-1]]:18: error: expected string constant + +.attribute atomic_abi, "16" +# CHECK: [[@LINE-1]]:24: error: expected numeric constant diff --git a/llvm/test/MC/RISCV/scoped-relaxation.s b/llvm/test/MC/RISCV/scoped-relaxation.s index 56394fd8053282..0b797ee5aca5eb 100644 --- a/llvm/test/MC/RISCV/scoped-relaxation.s +++ b/llvm/test/MC/RISCV/scoped-relaxation.s @@ -9,7 +9,7 @@ .dword function - . # CHECK: 0x0 R_RISCV_ADD64 function 0x0 -# CHECK-NEXT: 0x0 R_RISCV_SUB64 .L0 0x0 +# CHECK-NEXT: 0x0 R_RISCV_SUB64 0x0 # Relaxed reference, this will resolve to a pair of `RISCV_ADD64` and # `RISCV_SUB64` relocation. @@ -19,7 +19,7 @@ .option pop # CHECK: 0x8 R_RISCV_ADD64 function 0x0 -# CHECK-NEXT: 0x8 R_RISCV_SUB64 .L0 0x0 +# CHECK-NEXT: 0x8 R_RISCV_SUB64 0x0 # Unrelaxed reference, this will resolve to a pair of `RISCV_ADD64` and # `RISCV_SUB64` relocation due to relaxation being sticky to the file. @@ -29,6 +29,6 @@ .option pop # CHECK: 0x10 R_RISCV_ADD64 function 0x0 -# CHECK-NEXT: 0x10 R_RISCV_SUB64 .L0 0x0 +# CHECK-NEXT: 0x10 R_RISCV_SUB64 0x0 # CHECK: } diff --git a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td index 7c9df02ebd87c6..fc8abc6fbc547e 100644 --- a/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td +++ b/llvm/test/TableGen/GlobalISelEmitterSkippedPatterns.td @@ -1,4 +1,6 @@ // RUN: llvm-tblgen -warn-on-skipped-patterns -gen-global-isel -I %p/../../include %s -I %p/Common -o /dev/null 2>&1 | FileCheck %s +// RUN: llvm-tblgen -warn-on-skipped-patterns -gen-global-isel -I %p/../../include %s -I %p/Common -o /dev/null -DIGNORE 2>&1 | FileCheck --allow-empty --check-prefix=IGNORED %s + include "llvm/Target/Target.td" include "GlobalISelEmitterCommon.td" @@ -23,6 +25,10 @@ def INSN : I<(outs GPR32:$dst), (ins GPR32:$src1, complex:$src2), []>; //===- Bail out when we define a variable twice wrt complex suboperands. -===// +#ifdef IGNORE +let GISelShouldIgnore = 1 in +#endif +// IGNORED-NOT: warning: Skipped pattern: Error: {{.*}} // CHECK: warning: Skipped pattern: Error: Complex suboperand x referenced by different operands: complex_rr:x:y and complex_rr:x:z. def : Pat<(add (complex_rr GPR32:$x, GPR32:$y), (complex_rr GPR32:$x, GPR32:$z)), diff --git a/llvm/test/TableGen/riscv-target-def.td b/llvm/test/TableGen/riscv-target-def.td index ab589b31192f39..b23c7e4d40198b 100644 --- a/llvm/test/TableGen/riscv-target-def.td +++ b/llvm/test/TableGen/riscv-target-def.td @@ -2,8 +2,9 @@ include "llvm/Target/Target.td" -class RISCVExtension implies = [], +class RISCVExtension implies = [], + string fieldname = !subst("Feature", "Has", NAME), string value = "true"> : SubtargetFeature { int MajorVersion = major; @@ -11,18 +12,36 @@ class RISCVExtension implies = [], + string fieldname = !subst("Feature", "Has", NAME), + string value = "true"> + : RISCVExtension<"experimental-"#name, major, minor, desc, implies, + fieldname, value> { + let Experimental = true; +} + def FeatureStdExtI - : RISCVExtension<"i", 2, 1, "HasStdExtI", + : RISCVExtension<"i", 2, 1, "'I' (Base Integer Instruction Set)">; def FeatureStdExtZicsr - : RISCVExtension<"zicsr", 2, 0, "HasStdExtZicsr", + : RISCVExtension<"zicsr", 2, 0, "'zicsr' (CSRs)">; def FeatureStdExtZifencei - : RISCVExtension<"zifencei", 2, 0, "HasStdExtZifencei", + : RISCVExtension<"zifencei", 2, 0, "'Zifencei' (fence.i)">; +def FeatureStdExtF + : RISCVExtension<"f", 2, 2, + "'F' (Single-Precision Floating-Point)", + [FeatureStdExtZicsr]>; + +def FeatureStdExtZidummy + : RISCVExperimentalExtension<"zidummy", 0, 1, + "Dummy">; + def Feature32Bit : SubtargetFeature<"32bit", "IsRV32", "true", "Implements RV32">; def Feature64Bit @@ -75,22 +94,49 @@ def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64", def ROCKET : RISCVTuneProcessorModel<"rocket", NoSchedModel>; -// CHECK: #ifndef PROC -// CHECK: #define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS) -// CHECK: #endif +// CHECK: #ifdef GET_SUPPORTED_EXTENSIONS +// CHECK-NEXT: #undef GET_SUPPORTED_EXTENSIONS + +// CHECK: static const RISCVSupportedExtension SupportedExtensions[] = { +// CHECK-NEXT: {"f", {2, 2}}, +// CHECK-NEXT: {"i", {2, 1}}, +// CHECK-NEXT: {"zicsr", {2, 0}}, +// CHECK-NEXT: {"zifencei", {2, 0}}, +// CHECK-NEXT: }; + +// CHECK: static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { +// CHECK-NEXT: {"zidummy", {0, 1}}, +// CHECK-NEXT: }; + +// CHECK: #endif // GET_SUPPORTED_EXTENSIONS + +// CHECK: #ifdef GET_IMPLIED_EXTENSIONS +// CHECK-NEXT: #undef GET_IMPLIED_EXTENSIONS + +// CHECK: static const char *ImpliedExtsF[] = {"zicsr"}; + +// CHECK: static constexpr ImpliedExtsEntry ImpliedExts[] = { +// CHECK-NEXT: { {"f"}, {ImpliedExtsF} }, +// CHECK-NEXT: }; + +// CHECK: #endif // GET_IMPLIED_EXTENSIONS + +// CHECK: #ifndef PROC +// CHECK-NEXT: #define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS) +// CHECK-NEXT: #endif -// CHECK: PROC(GENERIC_RV32, {"generic-rv32"}, {"rv32i2p1"}, 0) -// CHECK: PROC(GENERIC_RV64, {"generic-rv64"}, {"rv64i2p1"}, 0) -// CHECK: PROC(ROCKET_RV32, {"rocket-rv32"}, {"rv32i2p1_zicsr2p0_zifencei2p0"}, 0) -// CHECK: PROC(ROCKET_RV64, {"rocket-rv64"}, {"rv64i2p1_zicsr2p0_zifencei2p0"}, 0) +// CHECK: PROC(GENERIC_RV32, {"generic-rv32"}, {"rv32i2p1"}, 0) +// CHECK-NEXT: PROC(GENERIC_RV64, {"generic-rv64"}, {"rv64i2p1"}, 0) +// CHECK-NEXT: PROC(ROCKET_RV32, {"rocket-rv32"}, {"rv32i2p1_zicsr2p0_zifencei2p0"}, 0) +// CHECK-NEXT: PROC(ROCKET_RV64, {"rocket-rv64"}, {"rv64i2p1_zicsr2p0_zifencei2p0"}, 0) // CHECK: #undef PROC -// CHECK: #ifndef TUNE_PROC -// CHECK: #define TUNE_PROC(ENUM, NAME) -// CHECK: #endif +// CHECK: #ifndef TUNE_PROC +// CHECK-NEXT: #define TUNE_PROC(ENUM, NAME) +// CHECK-NEXT: #endif // CHECK: TUNE_PROC(GENERIC, "generic") -// CHECK: TUNE_PROC(ROCKET, "rocket") +// CHECK-NEXT: TUNE_PROC(ROCKET, "rocket") // CHECK: #undef TUNE_PROC diff --git a/llvm/test/TableGen/simplify-patfrag.td b/llvm/test/TableGen/simplify-patfrag.td index 904c29696a6e2c..fbb6f97f286311 100644 --- a/llvm/test/TableGen/simplify-patfrag.td +++ b/llvm/test/TableGen/simplify-patfrag.td @@ -1,4 +1,5 @@ // RUN: llvm-tblgen -gen-dag-isel -I %p/../../include %s 2>&1 | FileCheck %s +// RUN: llvm-tblgen -gen-dag-isel -I %p/../../include -DIGNORE %s 2>&1 | FileCheck %s include "llvm/Target/Target.td" @@ -29,6 +30,10 @@ def anyconvert : PatFrags<(ops node:$src), [(bitconvert node:$src), (specialconvert node:$src)]>; +#ifdef IGNORE +// Ensure ShouldIgnore does not disable records in dag isel emitter +let GISelShouldIgnore = 1 in +#endif // And a rule that matches that PatFrag and turns it into i2f def : Pat<(f32 (anyconvert (i32 GPR:$val))), (i2f GPR:$val)>; diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll new file mode 100644 index 00000000000000..fd5a2044db48f3 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-load.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s + +; Make sure atomic loads are not bitcasted and lose metadata + +define float @load_atomic_f32_global_system(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define float @load_atomic_f32_global_system( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %ld = load atomic float, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 + ret float %ld +} + +define float @load_atomic_f32_global_agent(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define float @load_atomic_f32_global_agent( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %ld = load atomic float, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret float %ld +} + +define float @load_atomic_f32_local(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define float @load_atomic_f32_local( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %ld = load atomic float, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 + ret float %ld +} + +define float @load_atomic_f32_flat_system(ptr %ptr) { +; CHECK-LABEL: define float @load_atomic_f32_flat_system( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %ld = load atomic float, ptr %ptr seq_cst, align 4, !some.unknown.md !0 + ret float %ld +} + +define float @load_atomic_f32_flat_agent(ptr %ptr) { +; CHECK-LABEL: define float @load_atomic_f32_flat_agent( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CHECK-NEXT: ret float [[TMP2]] +; + %ld = load atomic float, ptr %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret float %ld +} + +define half @load_atomic_f16_global_system(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define half @load_atomic_f16_global_system( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half +; CHECK-NEXT: ret half [[TMP2]] +; + %ld = load atomic half, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 + ret half %ld +} + +define half @load_atomic_f16_global_agent(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define half @load_atomic_f16_global_agent( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half +; CHECK-NEXT: ret half [[TMP2]] +; + %ld = load atomic half, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret half %ld +} + +define half @load_atomic_f16_local(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define half @load_atomic_f16_local( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to half +; CHECK-NEXT: ret half [[TMP2]] +; + %ld = load atomic half, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0 + ret half %ld +} + +define bfloat @load_atomic_bf16_global_system(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define bfloat @load_atomic_bf16_global_system( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] seq_cst, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat +; CHECK-NEXT: ret bfloat [[TMP2]] +; + %ld = load atomic bfloat, ptr addrspace(1) %ptr seq_cst, align 2, !some.unknown.md !0 + ret bfloat %ld +} + +define bfloat @load_atomic_bf16_global_agent(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define bfloat @load_atomic_bf16_global_agent( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat +; CHECK-NEXT: ret bfloat [[TMP2]] +; + %ld = load atomic bfloat, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 2, !some.unknown.md !0 + ret bfloat %ld +} + +define bfloat @load_atomic_bf16_local(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define bfloat @load_atomic_bf16_local( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr addrspace(3) [[PTR]] seq_cst, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat +; CHECK-NEXT: ret bfloat [[TMP2]] +; + %ld = load atomic bfloat, ptr addrspace(3) %ptr seq_cst, align 2, !some.unknown.md !0 + ret bfloat %ld +} + +define bfloat @load_atomic_bf16_flat(ptr %ptr) { +; CHECK-LABEL: define bfloat @load_atomic_bf16_flat( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i16, ptr [[PTR]] seq_cst, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to bfloat +; CHECK-NEXT: ret bfloat [[TMP2]] +; + %ld = load atomic bfloat, ptr %ptr seq_cst, align 2, !some.unknown.md !0 + ret bfloat %ld +} + +define double @load_atomic_f64_global_system(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define double @load_atomic_f64_global_system( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %ld = load atomic double, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0 + ret double %ld +} + +define double @load_atomic_f64_global_agent(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define double @load_atomic_f64_global_agent( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %ld = load atomic double, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0 + ret double %ld +} + +define double @load_atomic_f64_local(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define double @load_atomic_f64_local( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr addrspace(3) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %ld = load atomic double, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0 + ret double %ld +} + +define double @load_atomic_f64_flat_system(ptr %ptr) { +; CHECK-LABEL: define double @load_atomic_f64_flat_system( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] seq_cst, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %ld = load atomic double, ptr %ptr seq_cst, align 8, !some.unknown.md !0 + ret double %ld +} + +define double @load_atomic_f64_flat_agent(ptr %ptr) { +; CHECK-LABEL: define double @load_atomic_f64_flat_agent( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i64, ptr [[PTR]] syncscope("agent") seq_cst, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double +; CHECK-NEXT: ret double [[TMP2]] +; + %ld = load atomic double, ptr %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0 + ret double %ld +} + +!0 = !{} + + diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll new file mode 100644 index 00000000000000..db0c3a20e62f48 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/no-expand-atomic-store.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck %s + +define void @store_atomic_f32_global_system(float %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f32_global_system( +; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic float %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f32_global_agent(float %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f32_global_agent( +; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic float %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f32_local(float %val, ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @store_atomic_f32_local( +; CHECK-SAME: float [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic float %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f32_flat(float %val, ptr %ptr) { +; CHECK-LABEL: define void @store_atomic_f32_flat( +; CHECK-SAME: float [[VAL:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL]] to i32 +; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic float %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f16_global_system(half %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f16_global_system( +; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic half %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f16_global_agent(half %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f16_global_agent( +; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic half %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f16_local(half %val, ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @store_atomic_f16_local( +; CHECK-SAME: half [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic half %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f16_flat(half %val, ptr %ptr) { +; CHECK-LABEL: define void @store_atomic_f16_flat( +; CHECK-SAME: half [[VAL:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic half %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_bf16_global_system(bfloat %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_bf16_global_system( +; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic bfloat %val, ptr addrspace(1) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_bf16_global_agent(bfloat %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_bf16_global_agent( +; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic bfloat %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_bf16_local(bfloat %val, ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @store_atomic_bf16_local( +; CHECK-SAME: bfloat [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic bfloat %val, ptr addrspace(3) %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} + +define void @store_atomic_bf16_flat(bfloat %val, ptr %ptr) { +; CHECK-LABEL: define void @store_atomic_bf16_flat( +; CHECK-SAME: bfloat [[VAL:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat [[VAL]] to i16 +; CHECK-NEXT: store atomic i16 [[TMP1]], ptr [[PTR]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic bfloat %val, ptr %ptr seq_cst, align 4, !some.unknown.md !0 + ret void +} +define void @store_atomic_f64_global_system(double %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f64_global_system( +; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 +; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: ret void +; + store atomic double %val, ptr addrspace(1) %ptr seq_cst, align 8, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f64_global_agent(double %val, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @store_atomic_f64_global_agent( +; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 +; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(1) [[PTR]] syncscope("agent") seq_cst, align 8 +; CHECK-NEXT: ret void +; + store atomic double %val, ptr addrspace(1) %ptr syncscope("agent") seq_cst, align 8, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f64_local(double %val, ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @store_atomic_f64_local( +; CHECK-SAME: double [[VAL:%.*]], ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 +; CHECK-NEXT: store atomic i64 [[TMP1]], ptr addrspace(3) [[PTR]] seq_cst, align 8 +; CHECK-NEXT: ret void +; + store atomic double %val, ptr addrspace(3) %ptr seq_cst, align 8, !some.unknown.md !0 + ret void +} + +define void @store_atomic_f64_flat(double %val, ptr %ptr) { +; CHECK-LABEL: define void @store_atomic_f64_flat( +; CHECK-SAME: double [[VAL:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL]] to i64 +; CHECK-NEXT: store atomic i64 [[TMP1]], ptr [[PTR]] seq_cst, align 8 +; CHECK-NEXT: ret void +; + store atomic double %val, ptr %ptr seq_cst, align 8, !some.unknown.md !0 + ret void +} + +!0 = !{} diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll new file mode 100644 index 00000000000000..55cde6c1431fe3 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/noop-copy-sink.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes='require,function(codegenprepare)' -mtriple=riscv64 %s \ +; RUN: | FileCheck --check-prefixes=CHECK %s + +define i16 @sink_trunc1(i64 %a) { +; CHECK-LABEL: @sink_trunc1( +; CHECK-NEXT: fnend: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[A:%.*]] to i16 +; CHECK-NEXT: ret i16 [[TMP0]] +; + %trunc = trunc i64 %a to i16 + br label %fnend + +fnend: + ret i16 %trunc +} + +; The flags on the original trunc should be preserved. +define i16 @sink_trunc2(i64 %a) { +; CHECK-LABEL: @sink_trunc2( +; CHECK-NEXT: fnend: +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[A:%.*]] to i16 +; CHECK-NEXT: ret i16 [[TMP0]] +; + %trunc = trunc nuw nsw i64 %a to i16 + br label %fnend + +fnend: + ret i16 %trunc +} diff --git a/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll index b4c24715037bca..d0095231a30f93 100644 --- a/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll +++ b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll @@ -1,22 +1,22 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; ; RUN: opt -passes="ipsccp" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC ; RUN: opt -passes="ipsccp" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=16 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) { -; FUNCSPEC-LABEL: define i64 @bar( +; FUNCSPEC-LABEL: define range(i64 4, 13) i64 @bar( ; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) { ; FUNCSPEC-NEXT: entry: -; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]] -; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]] +; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]) +; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]) ; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]] ; FUNCSPEC-NEXT: ret i64 [[ADD]] ; -; NOFUNCSPEC-LABEL: define i64 @bar( +; NOFUNCSPEC-LABEL: define range(i64 4, 13) i64 @bar( ; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) { ; NOFUNCSPEC-NEXT: entry: -; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]] -; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]] +; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]) +; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]) ; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]] ; NOFUNCSPEC-NEXT: ret i64 [[ADD]] ; @@ -28,6 +28,50 @@ entry: } define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) { +; NOFUNCSPEC-LABEL: define internal range(i64 2, 7) i64 @foo( +; NOFUNCSPEC-SAME: i64 [[N:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) { +; NOFUNCSPEC-NEXT: entry: +; NOFUNCSPEC-NEXT: br i1 [[C1]], label [[L1:%.*]], label [[L9:%.*]] +; NOFUNCSPEC: l1: +; NOFUNCSPEC-NEXT: [[PHI1:%.*]] = phi i64 [ [[N]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[L2:%.*]] ] +; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI1]], 1 +; NOFUNCSPEC-NEXT: br i1 [[C2]], label [[L1_5:%.*]], label [[EXIT:%.*]] +; NOFUNCSPEC: l1_5: +; NOFUNCSPEC-NEXT: br i1 [[C3]], label [[L1_75:%.*]], label [[L6:%.*]] +; NOFUNCSPEC: l1_75: +; NOFUNCSPEC-NEXT: br i1 [[C4]], label [[L2]], label [[L3:%.*]] +; NOFUNCSPEC: l2: +; NOFUNCSPEC-NEXT: [[PHI2]] = phi i64 [ [[PHI1]], [[L1_75]] ], [ [[PHI3:%.*]], [[L3]] ] +; NOFUNCSPEC-NEXT: br label [[L1]] +; NOFUNCSPEC: l3: +; NOFUNCSPEC-NEXT: [[PHI3]] = phi i64 [ [[PHI1]], [[L1_75]] ], [ [[PHI4:%.*]], [[L4:%.*]] ] +; NOFUNCSPEC-NEXT: br label [[L2]] +; NOFUNCSPEC: l4: +; NOFUNCSPEC-NEXT: [[PHI4]] = phi i64 [ [[PHI5:%.*]], [[L5:%.*]] ], [ [[PHI6:%.*]], [[L6]] ] +; NOFUNCSPEC-NEXT: br i1 [[C5]], label [[L3]], label [[L6]] +; NOFUNCSPEC: l5: +; NOFUNCSPEC-NEXT: [[PHI5]] = phi i64 [ [[PHI6]], [[L6_5:%.*]] ], [ [[PHI7:%.*]], [[L7:%.*]] ] +; NOFUNCSPEC-NEXT: br label [[L4]] +; NOFUNCSPEC: l6: +; NOFUNCSPEC-NEXT: [[PHI6]] = phi i64 [ [[PHI4]], [[L4]] ], [ [[PHI1]], [[L1_5]] ] +; NOFUNCSPEC-NEXT: br i1 [[C6]], label [[L4]], label [[L6_5]] +; NOFUNCSPEC: l6_5: +; NOFUNCSPEC-NEXT: br i1 [[C7]], label [[L5]], label [[L8:%.*]] +; NOFUNCSPEC: l7: +; NOFUNCSPEC-NEXT: [[PHI7]] = phi i64 [ [[PHI9:%.*]], [[L9]] ], [ [[PHI8:%.*]], [[L8]] ] +; NOFUNCSPEC-NEXT: br i1 [[C8]], label [[L5]], label [[L8]] +; NOFUNCSPEC: l8: +; NOFUNCSPEC-NEXT: [[PHI8]] = phi i64 [ [[PHI6]], [[L6_5]] ], [ [[PHI7]], [[L7]] ] +; NOFUNCSPEC-NEXT: br i1 [[C9]], label [[L7]], label [[L9]] +; NOFUNCSPEC: l9: +; NOFUNCSPEC-NEXT: [[PHI9]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[PHI8]], [[L8]] ] +; NOFUNCSPEC-NEXT: [[SUB:%.*]] = sub nuw nsw i64 [[PHI9]], 1 +; NOFUNCSPEC-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[SUB]], 2 +; NOFUNCSPEC-NEXT: br i1 [[C10]], label [[L7]], label [[EXIT]] +; NOFUNCSPEC: exit: +; NOFUNCSPEC-NEXT: [[RES:%.*]] = phi i64 [ 2, [[L1]] ], [ [[MUL]], [[L9]] ] +; NOFUNCSPEC-NEXT: ret i64 [[RES]] +; entry: br i1 %c1, label %l1, label %l9 diff --git a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll index b9481baae60b9e..a576d9aa32e140 100644 --- a/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/global-var-constants.ll @@ -49,10 +49,10 @@ entry: ; Check if specialisation on the address of a non-const global variable ; is not allowed, then it is not performed. -; NO-GLOBALS-LABEL: define internal i32 @g() +; NO-GLOBALS-LABEL: define internal range(i32 -2147483646, -2147483648) i32 @g() ; NO-GLOBALS: call i32 @f(ptr @G) -; NO-GLOBALS-LABEL: define i32 @h0(ptr %p) +; NO-GLOBALS-LABEL: define range(i32 -2147483646, -2147483648) i32 @h0(ptr %p) ; NO-GLOBALS:call i32 @g() ; NO-GLOBALS-LABEL: define i32 @h1() @@ -64,10 +64,10 @@ entry: ; Check if specialisation on the address of a non-const global variable ; is allowed, then it is performed where possible. -; GLOBALS-LABEL: define internal i32 @g() +; GLOBALS-LABEL: define internal range(i32 -2147483646, -2147483648) i32 @g() ; GLOBALS: call i32 @f.specialized.2() -; GLOBALS-LABEL: define i32 @h0(ptr %p) +; GLOBALS-LABEL: define range(i32 -2147483646, -2147483648) i32 @h0(ptr %p) ; GLOBALS: call i32 @g() ; GLOBALS-LABEL: define i32 @h1() diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll index f107ffe0ec7ebf..3eae3dc261fb2a 100644 --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -71,10 +71,10 @@ entry: ; CHECK-LIT-LABEL: define i32 @f1 ; CHECK-LIT: call i32 @neg.specialized.[[#B:]] -; CHECK-LIT-LABEL: define i32 @g0 +; CHECK-LIT-LABEL: define range(i32 -2147483647, -2147483648) i32 @g0 ; CHECK-LIT: call i32 @add.specialized.[[#C:]] -; CHECK-LIT-LABEL: define i32 @g1 +; CHECK-LIT-LABEL: define range(i32 -2147483647, -2147483648) i32 @g1 ; CHECK-LIT: call i32 @add.specialized.[[#D:]] ; CHECK-LIT-LABEL: define float @h0 diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll index 236821d8ba4c02..f439d4da6080c4 100644 --- a/llvm/test/Transforms/InstCombine/array.ll +++ b/llvm/test/Transforms/InstCombine/array.ll @@ -108,3 +108,163 @@ entry: store i32 %b, ptr %gep ret void } + +define ptr @gep_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[B_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i64 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %b.nneg = icmp sgt i64 %b, -1 + call void @llvm.assume(i1 %b.nneg) + %add = add nsw i64 %a, %b + %gep = getelementptr inbounds i32, ptr %ptr, i64 %add + ret ptr %gep +} + +define ptr @gep_inbounds_add_nsw_not_nonneg1(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_not_nonneg1( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i64 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %add = add nsw i64 %a, %b + %gep = getelementptr inbounds i32, ptr %ptr, i64 %add + ret ptr %gep +} + +define ptr @gep_inbounds_add_nsw_not_nonneg2(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_not_nonneg2( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[B_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %b.nneg = icmp sgt i64 %b, -1 + call void @llvm.assume(i1 %b.nneg) + %add = add nsw i64 %a, %b + %gep = getelementptr inbounds i32, ptr %ptr, i64 %add + ret ptr %gep +} + +define ptr @gep_not_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_not_inbounds_add_nsw_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[B_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i64 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %b.nneg = icmp sgt i64 %b, -1 + call void @llvm.assume(i1 %b.nneg) + %add = add nsw i64 %a, %b + %gep = getelementptr i32, ptr %ptr, i64 %add + ret ptr %gep +} + +define ptr @gep_inbounds_add_not_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @gep_inbounds_add_not_nsw_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i64 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[B_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]] +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i64 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %b.nneg = icmp sgt i64 %b, -1 + call void @llvm.assume(i1 %b.nneg) + %add = add i64 %a, %b + %gep = getelementptr inbounds i32, ptr %ptr, i64 %add + ret ptr %gep +} + +define ptr @gep_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) { +; CHECK-LABEL: define ptr @gep_inbounds_sext_add_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[A]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40 +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i32 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %add = add nsw i32 %a, 10 + %idx = sext i32 %add to i64 + %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx + ret ptr %gep +} + +define ptr @gep_inbounds_sext_add_not_nonneg_1(ptr %ptr, i32 %a) { +; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_1( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[A]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -40 +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i32 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %add = add nsw i32 %a, -10 + %idx = sext i32 %add to i64 + %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx + ret ptr %gep +} + +define ptr @gep_inbounds_sext_add_not_nonneg_2(ptr %ptr, i32 %a) { +; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_2( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40 +; CHECK-NEXT: ret ptr [[GEP]] +; + %add = add nsw i32 %a, 10 + %idx = sext i32 %add to i64 + %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx + ret ptr %gep +} + +define ptr @gep_not_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) { +; CHECK-LABEL: define ptr @gep_not_inbounds_sext_add_nonneg( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1 +; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]]) +; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[A]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[TMP1]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP2]], i64 40 +; CHECK-NEXT: ret ptr [[GEP]] +; + %a.nneg = icmp sgt i32 %a, -1 + call void @llvm.assume(i1 %a.nneg) + %add = add nsw i32 %a, 10 + %idx = sext i32 %add to i64 + %gep = getelementptr i32, ptr %ptr, i64 %idx + ret ptr %gep +} diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index 7c9289c447113f..3c4088832feaaa 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -980,7 +980,7 @@ define float @fneg_ldexp_contract(float %x, i32 %n) { define float @fneg_ldexp_metadata(float %x, i32 %n) { ; CHECK-LABEL: @fneg_ldexp_metadata( ; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[X:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[N:%.*]]), !arst !0 +; CHECK-NEXT: [[NEG:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[N:%.*]]), !arst [[META0:![0-9]+]] ; CHECK-NEXT: ret float [[NEG]] ; %ldexp = call float @llvm.ldexp.f32.i32(float %x, i32 %n), !arst !0 @@ -988,4 +988,125 @@ define float @fneg_ldexp_metadata(float %x, i32 %n) { ret float %neg } +define float @test_fneg_select_constants(i1 %cond) { +; CHECK-LABEL: @test_fneg_select_constants( +; CHECK-NEXT: [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float 0.000000e+00 +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float 0.0, float -0.0 + %neg = fneg float %sel1 + ret float %neg +} + +define <2 x float> @test_fneg_vec(<2 x i1> %cond) { +; CHECK-LABEL: @test_fneg_vec( +; CHECK-NEXT: [[NEG:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> , <2 x float> +; CHECK-NEXT: ret <2 x float> [[NEG]] +; + %sel1 = select <2 x i1> %cond, <2 x float> , <2 x float> + %neg = fneg <2 x float> %sel1 + ret <2 x float> %neg +} + +define float @test_fneg_select_var_constant(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_var_constant( +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = select i1 [[COND:%.*]], float [[X_NEG]], float 0.000000e+00 +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float %x, float -0.0 + %neg = fneg float %sel1 + ret float %neg +} + +; nsz can be preserved. + +define float @test_fneg_select_var_constant_fmf1(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_var_constant_fmf1( +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = select nnan ninf nsz i1 [[COND:%.*]], float [[X_NEG]], float -1.000000e+00 +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select nnan ninf nsz i1 %cond, float %x, float 1.0 + %neg = fneg float %sel1 + ret float %neg +} + +define float @test_fneg_select_var_constant_fmf2(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_var_constant_fmf2( +; CHECK-NEXT: [[X_NEG:%.*]] = fneg nnan ninf nsz float [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = select nnan ninf nsz i1 [[COND:%.*]], float [[X_NEG]], float -1.000000e+00 +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float %x, float 1.0 + %neg = fneg nnan ninf nsz float %sel1 + ret float %neg +} + +define float @test_fneg_select_constant_var(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_constant_var( +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[X_NEG]] +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float 0.0, float %x + %neg = fneg float %sel1 + ret float %neg +} + +; Make sure nabs is generated. + +define float @test_fneg_select_abs(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_abs( +; CHECK-NEXT: [[ABSX:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[ABSX_NEG:%.*]] = fneg float [[ABSX]] +; CHECK-NEXT: [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[ABSX_NEG]] +; CHECK-NEXT: ret float [[NEG]] +; + %absx = call float @llvm.fabs.f32(float %x) + %sel1 = select i1 %cond, float 0.0, float %absx + %neg = fneg float %sel1 + ret float %neg +} + +define float @test_fneg_fabs_select(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_fabs_select( +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[NEG:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[DOTNEG]] +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float 0.0, float %x + %abs = call float @llvm.fabs.f32(float %sel1) + %neg = fneg float %abs + ret float %neg +} + +define float @test_fneg_select_constant_var_multiuse(i1 %cond, float %x) { +; CHECK-LABEL: @test_fneg_select_constant_var_multiuse( +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[X:%.*]] +; CHECK-NEXT: call void @use(float [[SEL1]]) +; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SEL1]] +; CHECK-NEXT: ret float [[NEG]] +; + %sel1 = select i1 %cond, float 0.0, float %x + call void @use(float %sel1) + %neg = fneg float %sel1 + ret float %neg +} + +; Don't break fmax idioms. + +define float @test_fneg_select_maxnum(float %x) { +; CHECK-LABEL: @test_fneg_select_maxnum( +; CHECK-NEXT: [[SEL1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SEL1]] +; CHECK-NEXT: ret float [[NEG]] +; + %cmp1 = fcmp ogt float %x, 1.0 + %sel1 = select nnan nsz i1 %cmp1, float %x, float 1.0 + %neg = fneg float %sel1 + ret float %neg +} + !0 = !{} diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll new file mode 100644 index 00000000000000..7b7c6fba699c21 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll @@ -0,0 +1,292 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +declare void @use64(i64) +declare void @useptr(ptr) + +define ptr @test_zero(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_zero( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_nonzero(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_nonzero( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 4 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 2 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_or_disjoint(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_or_disjoint( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = or disjoint i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_zero_multiuse_index(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_zero_multiuse_index( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1 +; CHECK-NEXT: call void @use64(i64 [[INDEX]]) +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 1 + call void @use64(i64 %index) + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_zero_multiuse_ptr(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_zero_multiuse_ptr( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: call void @useptr(ptr [[P1]]) +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + call void @useptr(ptr %p1) + %index = add i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) { +; CHECK-LABEL: define ptr @test_zero_sext_add_nsw( +; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add nsw i32 %a, 1 + %p2 = getelementptr i32, ptr %p1, i32 %index + ret ptr %p2 +} + +define ptr @test_zero_trunc_add(ptr %base, i128 %a) { +; CHECK-LABEL: define ptr @test_zero_trunc_add( +; CHECK-SAME: ptr [[BASE:%.*]], i128 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[A]] to i64 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i128 %a, 1 + %p2 = getelementptr i32, ptr %p1, i128 %index + ret ptr %p2 +} + +define ptr @test_non_i8(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_non_i8( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]] +; CHECK-NEXT: ret ptr [[TMP0]] +; +entry: + %p1 = getelementptr i16, ptr %base, i64 -4 + %index = add i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_non_const(ptr %base, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @test_non_const( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 %b + %index = add i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_too_many_indices(ptr %base, i64 %a, i64 %b) { +; CHECK-LABEL: define ptr @test_too_many_indices( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]] +; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 1, i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 %b + %index = add i64 %a, 1 + %p2 = getelementptr [8 x i32], ptr %p1, i64 1, i64 %index + ret ptr %p2 +} + +define ptr @test_wrong_op(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_wrong_op( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[INDEX:%.*]] = xor i64 [[A]], 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = xor i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_sext_add_without_nsw(ptr %base, i32 %a) { +; CHECK-LABEL: define ptr @test_sext_add_without_nsw( +; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[INDEX:%.*]] = add i32 [[A]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i32 %a, 1 + %p2 = getelementptr i32, ptr %p1, i32 %index + ret ptr %p2 +} + +define ptr @test_or_without_disjoint(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_or_without_disjoint( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[INDEX:%.*]] = or i64 [[A]], 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = or i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_smul_overflow(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_smul_overflow( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -12 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]] +; CHECK-NEXT: ret ptr [[TMP0]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 9223372036854775806 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_sadd_overflow(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_sadd_overflow( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -9223372036854775808 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]] +; CHECK-NEXT: ret ptr [[TMP0]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 9223372036854775804 + %index = add i64 %a, 1 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_nonzero_multiuse_index(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_nonzero_multiuse_index( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 2 +; CHECK-NEXT: call void @use64(i64 [[INDEX]]) +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 2 + call void @use64(i64 %index) + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_nonzero_multiuse_ptr(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_nonzero_multiuse_ptr( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: call void @useptr(ptr [[P1]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + call void @useptr(ptr %p1) + %index = add i64 %a, 2 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_scalable(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_scalable( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[P1]], i64 [[A]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr , ptr [[TMP0]], i64 1 +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 -4 + %index = add i64 %a, 1 + %p2 = getelementptr , ptr %p1, i64 %index + ret ptr %p2 +} diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll index a7d01b4f824db0..e4fb7764ba9e53 100644 --- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll +++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll @@ -364,8 +364,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__select_chain_inf_nan_1(i1 %cond, define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: ret float [[FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[TMP1]] ; %select = select i1 %cond, float %x, float 0xFFF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -376,8 +376,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_ninf_rhs(i1 %cond, f define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: ret float [[FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[TMP1]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -400,8 +400,8 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__fabs_ define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: ret float [[FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[TMP1]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -435,8 +435,8 @@ define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_na define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] -; CHECK-NEXT: ret float [[FNEG]] +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: ret float [[X_NEG]] ; %select = select i1 %cond, float %x, float 0xFFF0000000000000 %fneg = fneg float %select @@ -447,8 +447,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_select_ninf_rhs(i1 %cond, f define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] -; CHECK-NEXT: ret float [[FNEG]] +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: ret float [[X_NEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fneg = fneg float %select @@ -459,8 +459,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf nzero nsub nnorm) float @ret_nofpclass_nonegatives_noinf___fneg_select_ninf_lhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[X]] -; CHECK-NEXT: ret float [[FNEG]] +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: ret float [[X_NEG]] ; %select = select i1 %cond, float 0xFFF0000000000000, float %x %fneg = fneg float %select @@ -470,8 +470,8 @@ define nofpclass(inf nnorm nsub nzero) float @ret_nofpclass_nonegatives_noinf___ define nofpclass(pzero psub pnorm pinf) float @ret_nofpclass_nopositives___fneg_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives___fneg_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[X]], float 0x7FF0000000000000 -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[SELECT]] +; CHECK-NEXT: [[X_NEG:%.*]] = fneg float [[X]] +; CHECK-NEXT: [[FNEG:%.*]] = select i1 [[COND]], float [[X_NEG]], float 0xFFF0000000000000 ; CHECK-NEXT: ret float [[FNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -483,9 +483,9 @@ define nofpclass(pzero psub pnorm pinf) float @ret_nofpclass_nopositives___fneg_ define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FABS]] -; CHECK-NEXT: ret float [[FNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: ret float [[DOTNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -497,9 +497,9 @@ define nofpclass(inf) float @ret_nofpclass_inf__fneg_fabs_select_pinf_rhs(i1 %co define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @ret_nofpclass_nonegatives__fneg_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[FNEG:%.*]] = fneg float [[FABS]] -; CHECK-NEXT: ret float [[FNEG]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: ret float [[DOTNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs = call float @llvm.fabs.f32(float %select) @@ -535,8 +535,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_unknown_select_pinf_rhs define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs(i1 %cond, float %x) { ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_positive_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: ret float [[COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[TMP1]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %copysign = call float @llvm.copysign.f32(float %select, float 1.0) @@ -547,8 +547,8 @@ define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rh ; CHECK-LABEL: define nofpclass(inf) float @ret_nofpclass_inf__copysign_negative_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] -; CHECK-NEXT: ret float [[COPYSIGN]] +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: ret float [[DOTNEG]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %copysign = call float @llvm.copysign.f32(float %select, float -1.0) @@ -627,8 +627,8 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_nonegatives_non define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs(i1 %cond, float %x, float %sign) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_nopositives__copysign_fabs_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[SIGN:%.*]]) { -; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: ret float [[COPYSIGN]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: ret float [[TMP1]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 %fabs.sign = call float @llvm.fabs.f32(float %sign) @@ -678,9 +678,9 @@ define nofpclass(ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives__copys define nofpclass(pinf pnorm psub pzero) float @ret_nofpclass_no_positives__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @ret_nofpclass_no_positives__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND]], float [[TMP2]], float 0x7FF0000000000000 -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = select i1 [[COND]], float [[DOTNEG]], float 0xFFF0000000000000 ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 @@ -705,9 +705,9 @@ define nofpclass(nan ninf nnorm nsub nzero) float @ret_nofpclass_no_negatives_no define nofpclass(nan pinf pnorm psub pzero) float @ret_nofpclass_no_positives_nonan__copysign_unknown_select_pinf_rhs(i1 %cond, float %x, float %unknown.sign) { ; CHECK-LABEL: define nofpclass(nan pinf pzero psub pnorm) float @ret_nofpclass_no_positives_nonan__copysign_unknown_select_pinf_rhs ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], float [[UNKNOWN_SIGN:%.*]]) { -; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.fabs.f32(float [[X]]) -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND]], float [[TMP2]], float 0x7FF0000000000000 -; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[DOTNEG:%.*]] = fneg float [[TMP1]] +; CHECK-NEXT: [[COPYSIGN:%.*]] = select i1 [[COND]], float [[DOTNEG]], float 0xFFF0000000000000 ; CHECK-NEXT: ret float [[COPYSIGN]] ; %select = select i1 %cond, float %x, float 0x7FF0000000000000 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 56dd9c6a879981..32ed4a787e9262 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -1123,7 +1123,8 @@ define i64 @test58(ptr %foo, i64 %i, i64 %j) { define i64 @test59(ptr %foo, i64 %i) { ; CHECK-LABEL: @test59( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO:%.*]], i64 0, i64 42, i64 [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[FOO:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4200 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4200 ; CHECK-NEXT: store ptr [[GEP1]], ptr @dummy_global1, align 8 ; CHECK-NEXT: store ptr [[GEP2]], ptr @dummy_global2, align 8 @@ -1142,13 +1143,12 @@ define i64 @test59(ptr %foo, i64 %i) { define i64 @test60(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test60( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO:%.*]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4200 -; CHECK-NEXT: [[CAST1:%.*]] = ptrtoint ptr [[GEP1]] to i64 -; CHECK-NEXT: [[CAST2:%.*]] = ptrtoint ptr [[GEP2]] to i64 -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[J:%.*]], 100 +; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add nsw i64 [[GEP1_IDX]], [[I:%.*]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 [[GEP1_OFFS]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = add nsw i64 [[GEP1_OFFS]], -4200 ; CHECK-NEXT: store ptr [[GEP1]], ptr @dummy_global1, align 8 -; CHECK-NEXT: ret i64 [[SUB]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; ; gep1 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic. %gep1 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 %j, i64 %i @@ -1162,13 +1162,12 @@ define i64 @test60(ptr %foo, i64 %i, i64 %j) { define i64 @test61(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test61( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 4200 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [100 x [100 x i8]], ptr [[FOO]], i64 0, i64 [[J:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[CAST1:%.*]] = ptrtoint ptr [[GEP1]] to i64 -; CHECK-NEXT: [[CAST2:%.*]] = ptrtoint ptr [[GEP2]] to i64 -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CAST1]], [[CAST2]] +; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[J:%.*]], 100 +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add nsw i64 [[GEP2_IDX]], [[I:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 [[GEP2_OFFS]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 4200, [[GEP2_OFFS]] ; CHECK-NEXT: store ptr [[GEP2]], ptr @dummy_global2, align 8 -; CHECK-NEXT: ret i64 [[SUB]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; ; gep2 has a non-constant index and more than one uses. Shouldn't duplicate the arithmetic. %gep1 = getelementptr inbounds [100 x [100 x i8]], ptr %foo, i64 0, i64 42, i64 0 @@ -1186,11 +1185,8 @@ define i64 @test_sub_ptradd_multiuse(ptr %p, i64 %idx1, i64 %idx2) { ; CHECK-LABEL: @test_sub_ptradd_multiuse( ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[IDX1:%.*]] ; CHECK-NEXT: call void @use.ptr(ptr [[P1]]) -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[P1_INT:%.*]] = ptrtoint ptr [[P1]] to i64 -; CHECK-NEXT: [[P2_INT:%.*]] = ptrtoint ptr [[P2]] to i64 -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[P1_INT]], [[P2_INT]] -; CHECK-NEXT: ret i64 [[SUB]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[IDX1]], [[IDX2:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds i8, ptr %p, i64 %idx1 call void @use.ptr(ptr %p1) diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll b/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll index 4c88f4acc12f16..2795333effd76b 100644 --- a/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll +++ b/llvm/test/Transforms/LowerTypeTests/cfi-nounwind-direct-call.ll @@ -109,8 +109,8 @@ attributes #6 = { noreturn nounwind } !11 = !{} !12 = !{!"branch_weights", i32 1048575, i32 1} ; CHECK: Function Attrs: minsize mustprogress nofree norecurse nosync nounwind optsize willreturn memory(none) -; CHECK-LABEL: define dso_local noundef i32 @_Z9nothrow_ei -; CHECK-SAME: (i32 noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] !type !4 !type !5 !type !6 { +; CHECK-LABEL: define dso_local noundef range(i32 0, 2) i32 @_Z9nothrow_ei +; CHECK-SAME: (i32 noundef [[NUM:%.*]]) #[[ATTR0:[0-9]+]] !type [[META4:![0-9]+]] !type [[META5:![0-9]+]] !type [[META6:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ne i32 [[NUM]], 0 ; CHECK-NEXT: [[DOT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 @@ -118,8 +118,8 @@ attributes #6 = { noreturn nounwind } ; ; ; CHECK: Function Attrs: minsize mustprogress nofree norecurse nosync nounwind optsize willreturn memory(write, argmem: none, inaccessiblemem: none) -; CHECK-LABEL: define dso_local noundef i32 @_Z10call_catchi -; CHECK-SAME: (i32 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !type !4 !type !5 !type !6 { +; CHECK-LABEL: define dso_local noundef range(i32 0, 2) i32 @_Z10call_catchi +; CHECK-SAME: (i32 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !type [[META4]] !type [[META5]] !type [[META6]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store ptr @_Z9nothrow_ei.cfi_jt, ptr @catch_ptr, align 8, !tbaa [[TBAA7:![0-9]+]] ; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp ne i32 [[NUM]], 0 @@ -131,17 +131,17 @@ attributes #6 = { noreturn nounwind } ; CHECK-LABEL: define weak_odr hidden void @__cfi_check_fail ; CHECK-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq ptr [[TMP0]], null, !nosanitize !11 -; CHECK-NEXT: br i1 [[DOTNOT]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize !11 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq ptr [[TMP0]], null, !nosanitize [[META11:![0-9]+]] +; CHECK-NEXT: br i1 [[DOTNOT]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize [[META11]] ; CHECK: trap: -; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR5:[0-9]+]], !nosanitize !11 -; CHECK-NEXT: unreachable, !nosanitize !11 +; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 2) #[[ATTR6:[0-9]+]], !nosanitize [[META11]] +; CHECK-NEXT: unreachable, !nosanitize [[META11]] ; CHECK: cont: -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 4, !nosanitize !11 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 4, !nosanitize [[META11]] ; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i8 [[TMP2]], 5 ; CHECK-NEXT: br i1 [[SWITCH]], label [[TRAP]], label [[CONT6:%.*]] ; CHECK: cont6: -; CHECK-NEXT: ret void, !nosanitize !11 +; CHECK-NEXT: ret void, !nosanitize [[META11]] ; ; ; CHECK-LABEL: define weak void @__cfi_check @@ -153,8 +153,8 @@ attributes #6 = { noreturn nounwind } ; ; CHECK: Function Attrs: naked nocf_check noinline nounwind ; CHECK-LABEL: define internal void @_Z9nothrow_ei.cfi_jt -; CHECK-SAME: () #[[ATTR4:[0-9]+]] align 8 { +; CHECK-SAME: () #[[ATTR5:[0-9]+]] align 8 { ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr nonnull @_Z9nothrow_ei) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: tail call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(ptr nonnull @_Z9nothrow_ei) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll index 9206893cb2341e..c133852f66937d 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll @@ -7,7 +7,7 @@ target triple = "aarch64" ; Check that the function gets vectorized. define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) { -; CHECK-LABEL: define i32 @quant_4x4 +; CHECK-LABEL: define range(i32 0, 2) i32 @quant_4x4 ; CHECK-SAME: (ptr nocapture noundef [[DCT:%.*]], ptr nocapture noundef readonly [[MF:%.*]], ptr nocapture noundef readonly [[BIAS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DCT]], i64 32 diff --git a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll index 67d721b23d6f00..35d5ceeb91950f 100644 --- a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll +++ b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll @@ -2,7 +2,7 @@ ; RUN: opt -O1 -S < %s | FileCheck %s define i32 @testa(i32 %mul) { -; CHECK-LABEL: define i32 @testa( +; CHECK-LABEL: define range(i32 -65536, 65536) i32 @testa( ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15 ; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767) @@ -16,7 +16,7 @@ define i32 @testa(i32 %mul) { } define i32 @testb(i32 %mul) { -; CHECK-LABEL: define i32 @testb( +; CHECK-LABEL: define range(i32 -16777216, 16777216) i32 @testb( ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[SHR102:%.*]] = ashr i32 [[MUL]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128) diff --git a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll index fb338a6507eba8..63cfef6f3d09ae 100644 --- a/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll +++ b/llvm/test/Transforms/PhaseOrdering/min_max_loop.ll @@ -19,7 +19,7 @@ ;; } define i16 @vecreduce_smin_v2i16(i32 %n, ptr %v) { -; CHECK-LABEL: define i16 @vecreduce_smin_v2i16( +; CHECK-LABEL: define range(i16 -32768, 1) i16 @vecreduce_smin_v2i16( ; CHECK: @llvm.smin.v2i16 entry: @@ -65,7 +65,7 @@ for.end: ; preds = %for.cond } define i16 @vecreduce_smax_v2i16(i32 %n, ptr %v) { -; CHECK-LABEL: define i16 @vecreduce_smax_v2i16( +; CHECK-LABEL: define range(i16 0, -32768) i16 @vecreduce_smax_v2i16( ; CHECK: @llvm.smax.v2i16 entry: diff --git a/llvm/test/Transforms/SCCP/and-add-shl.ll b/llvm/test/Transforms/SCCP/and-add-shl.ll index 7c037ffa6bf640..7af563f13a18ab 100644 --- a/llvm/test/Transforms/SCCP/and-add-shl.ll +++ b/llvm/test/Transforms/SCCP/and-add-shl.ll @@ -59,7 +59,7 @@ define i8 @and_not_shl_1(i8 %x) { ; Negative test: https://alive2.llvm.org/ce/z/Zv4Pyu define i8 @and_add_shl_overlap(i8 %x) { -; CHECK-LABEL: define i8 @and_add_shl_overlap +; CHECK-LABEL: define range(i8 0, 33) i8 @and_add_shl_overlap ; CHECK-SAME: (i8 [[X:%.*]]) { ; CHECK-NEXT: [[OP1_P2:%.*]] = icmp ule i8 [[X]], 6 ; CHECK-NEXT: call void @llvm.assume(i1 [[OP1_P2]]) @@ -77,7 +77,7 @@ define i8 @and_add_shl_overlap(i8 %x) { } define i8 @and_not_shl_overlap(i8 %x) { -; CHECK-LABEL: define i8 @and_not_shl_overlap +; CHECK-LABEL: define range(i8 0, 5) i8 @and_not_shl_overlap ; CHECK-SAME: (i8 [[X:%.*]]) { ; CHECK-NEXT: [[OP1_P2:%.*]] = icmp ule i8 [[X]], 3 ; CHECK-NEXT: call void @llvm.assume(i1 [[OP1_P2]]) diff --git a/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll index 64c1b9020a054f..c24c554102ddf8 100644 --- a/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll +++ b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll @@ -1,20 +1,21 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=ipsccp -S %s | FileCheck %s ; Test 1. ; Both arguments and return value of @callee can be tracked. The inferred range ; can be added to call sites. define internal i32 @callee(i32 %x) { -; CHECK-LABEL: @callee( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-LABEL: define internal range(i32 0, 21) i32 @callee( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: ret i32 [[X]] ; ret i32 %x } define i32 @caller1() { -; CHECK-LABEL: @caller1( -; CHECK-NEXT: [[C1:%.*]] = call i32 @callee(i32 10), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[C2:%.*]] = call i32 @callee(i32 20), !range [[RNG0]] +; CHECK-LABEL: define range(i32 0, 41) i32 @caller1() { +; CHECK-NEXT: [[C1:%.*]] = call i32 @callee(i32 10) +; CHECK-NEXT: [[C2:%.*]] = call i32 @callee(i32 20) ; CHECK-NEXT: [[A:%.*]] = add nuw nsw i32 [[C1]], [[C2]] ; CHECK-NEXT: ret i32 [[A]] ; @@ -25,9 +26,10 @@ define i32 @caller1() { } define i32 @caller2(i32 %x) { -; CHECK-LABEL: @caller2( -; CHECK-NEXT: [[X_15:%.*]] = and i32 [[X:%.*]], 15 -; CHECK-NEXT: [[C:%.*]] = call i32 @callee(i32 [[X_15]]), !range [[RNG0]] +; CHECK-LABEL: define range(i32 0, 21) i32 @caller2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[X_15:%.*]] = and i32 [[X]], 15 +; CHECK-NEXT: [[C:%.*]] = call i32 @callee(i32 [[X_15]]) ; CHECK-NEXT: ret i32 [[C]] ; %x.15 = and i32 %x, 15 @@ -43,14 +45,15 @@ define i32 @caller2(i32 %x) { declare void @use_cb1(ptr) define internal i32 @callee2(i32 %x) { -; CHECK-LABEL: @callee2( -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-LABEL: define internal i32 @callee2( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: ret i32 [[X]] ; ret i32 %x } define void @caller_cb1() { -; CHECK-LABEL: @caller_cb1( +; CHECK-LABEL: define void @caller_cb1() { ; CHECK-NEXT: [[C1:%.*]] = call i32 @callee2(i32 9) ; CHECK-NEXT: [[C2:%.*]] = call i32 @callee2(i32 10) ; CHECK-NEXT: call void @use_cb1(ptr @callee2) @@ -70,8 +73,9 @@ define void @caller_cb1() { declare void @use_cb2(ptr) define internal i32 @callee3(i32 %x) { -; CHECK-LABEL: @callee3( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], 10 +; CHECK-LABEL: define internal range(i32 500, 601) i32 @callee3( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10 ; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 500, i32 600 ; CHECK-NEXT: ret i32 [[S]] ; @@ -81,9 +85,9 @@ define internal i32 @callee3(i32 %x) { } define void @caller_cb2() { -; CHECK-LABEL: @caller_cb2( -; CHECK-NEXT: [[C1:%.*]] = call i32 @callee3(i32 9), !range [[RNG1:![0-9]+]] -; CHECK-NEXT: [[C2:%.*]] = call i32 @callee3(i32 10), !range [[RNG1]] +; CHECK-LABEL: define void @caller_cb2() { +; CHECK-NEXT: [[C1:%.*]] = call i32 @callee3(i32 9) +; CHECK-NEXT: [[C2:%.*]] = call i32 @callee3(i32 10) ; CHECK-NEXT: call void @use_cb2(ptr @callee3) ; CHECK-NEXT: ret void ; @@ -100,9 +104,10 @@ define void @caller_cb2() { declare void @use_cb3(ptr) define internal i32 @callee4(i32 %x, i32 %y) { -; CHECK-LABEL: @callee4( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], 10 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 500, i32 [[Y:%.*]] +; CHECK-LABEL: define internal i32 @callee4( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], 10 +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 500, i32 [[Y]] ; CHECK-NEXT: ret i32 [[S]] ; %c = icmp eq i32 %x, 10 @@ -111,11 +116,9 @@ define internal i32 @callee4(i32 %x, i32 %y) { } define void @caller_cb3() { -; CHECK-LABEL: @caller_cb3( +; CHECK-LABEL: define void @caller_cb3() { ; CHECK-NEXT: [[C1:%.*]] = call i32 @callee4(i32 11, i32 30) -; CHECK-NOT: !range ; CHECK-NEXT: [[C2:%.*]] = call i32 @callee4(i32 12, i32 40) -; CHECK-NOT: !range ; CHECK-NEXT: call void @use_cb3(ptr @callee4) ; CHECK-NEXT: ret void ; @@ -129,15 +132,16 @@ define void @caller_cb3() { ; Range for the return value of callee5 includes undef. No range metadata ; should be added at call sites. define internal i32 @callee5(i32 %x, i32 %y) { -; CHECK-LABEL: @callee5( -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X:%.*]], 15 +; CHECK-LABEL: define internal i32 @callee5( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X]], 15 ; CHECK-NEXT: br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[Y:%.*]], [[BB1]] ], [ undef, [[BB2]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[Y]], [[BB1]] ], [ undef, [[BB2]] ] ; CHECK-NEXT: ret i32 [[RES]] ; %c = icmp slt i32 %x, 15 @@ -155,11 +159,9 @@ exit: } define i32 @caller5() { -; CHECK-LABEL: @caller5( +; CHECK-LABEL: define range(i32 200, 401) i32 @caller5() { ; CHECK-NEXT: [[C1:%.*]] = call i32 @callee5(i32 10, i32 100) -; CHECK-NOT: !range ; CHECK-NEXT: [[C2:%.*]] = call i32 @callee5(i32 20, i32 200) -; CHECK-NOT: !range ; CHECK-NEXT: [[A:%.*]] = add i32 [[C1]], [[C2]] ; CHECK-NEXT: ret i32 [[A]] ; @@ -170,8 +172,9 @@ define i32 @caller5() { } define internal <2 x i64> @ctlz(<2 x i64> %arg) { -; CHECK-LABEL: @ctlz( -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[ARG:%.*]], i1 false) +; CHECK-LABEL: define internal range(i64 0, 65) <2 x i64> @ctlz( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[ARG]], i1 false) ; CHECK-NEXT: ret <2 x i64> [[RES]] ; %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %arg, i1 false) @@ -179,8 +182,9 @@ define internal <2 x i64> @ctlz(<2 x i64> %arg) { } define <2 x i64> @ctlz_caller(<2 x i64> %arg) { -; CHECK-LABEL: @ctlz_caller( -; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @ctlz(<2 x i64> [[ARG:%.*]]), !range [[RNG2:![0-9]+]] +; CHECK-LABEL: define range(i64 0, 65) <2 x i64> @ctlz_caller( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @ctlz(<2 x i64> [[ARG]]) ; CHECK-NEXT: ret <2 x i64> [[RES]] ; %res = call <2 x i64> @ctlz(<2 x i64> %arg) @@ -189,6 +193,3 @@ define <2 x i64> @ctlz_caller(<2 x i64> %arg) { declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) -; CHECK: [[RNG0]] = !{i32 0, i32 21} -; CHECK: [[RNG1]] = !{i32 500, i32 601} -; CHECK: [[RNG2]] = !{i64 0, i64 65} diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index 80d90922c2fbdb..05fa04a9fbe06f 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -1,10 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=ipsccp -S | FileCheck %s ; x = [100, 301) define internal i1 @f.trunc(i32 %x) { -; CHECK-LABEL: @f.trunc( -; CHECK-NEXT: [[T_1:%.*]] = trunc nuw nsw i32 [[X:%.*]] to i16 +; CHECK-LABEL: define internal i1 @f.trunc( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[T_1:%.*]] = trunc nuw nsw i32 [[X]] to i16 ; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i16 [[T_1]], 299 ; CHECK-NEXT: [[C_4:%.*]] = icmp slt i16 [[T_1]], 101 ; CHECK-NEXT: [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]] @@ -43,7 +44,7 @@ define internal i1 @f.trunc(i32 %x) { } define i1 @caller1() { -; CHECK-LABEL: @caller1( +; CHECK-LABEL: define i1 @caller1() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.trunc(i32 100) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.trunc(i32 300) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -58,14 +59,15 @@ define i1 @caller1() { ; x = [100, 301) define internal i1 @f.zext(i32 %x, i32 %y) { -; CHECK-LABEL: @f.zext( -; CHECK-NEXT: [[T_1:%.*]] = zext nneg i32 [[X:%.*]] to i64 +; CHECK-LABEL: define internal i1 @f.zext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[T_1:%.*]] = zext nneg i32 [[X]] to i64 ; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299 ; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[T_1]], 101 ; CHECK-NEXT: [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]] ; CHECK-NEXT: [[RES_2:%.*]] = add nuw nsw i1 [[RES_1]], false ; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] -; CHECK-NEXT: [[T_2:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[T_2:%.*]] = zext i32 [[Y]] to i64 ; CHECK-NEXT: [[C_5:%.*]] = icmp sgt i64 [[T_2]], 300 ; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 299 ; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], 1 @@ -97,7 +99,7 @@ define internal i1 @f.zext(i32 %x, i32 %y) { } define i1 @caller.zext() { -; CHECK-LABEL: @caller.zext( +; CHECK-LABEL: define i1 @caller.zext() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.zext(i32 100, i32 -120) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.zext(i32 300, i32 900) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -111,14 +113,15 @@ define i1 @caller.zext() { ; x = [100, 301) define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-LABEL: @f.sext( -; CHECK-NEXT: [[T_1:%.*]] = zext nneg i32 [[X:%.*]] to i64 +; CHECK-LABEL: define internal i1 @f.sext( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[T_1:%.*]] = zext nneg i32 [[X]] to i64 ; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299 ; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[T_1]], 101 ; CHECK-NEXT: [[RES_1:%.*]] = add nuw nsw i1 false, [[C_2]] ; CHECK-NEXT: [[RES_2:%.*]] = add nuw nsw i1 [[RES_1]], false ; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] -; CHECK-NEXT: [[T_2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[T_2:%.*]] = sext i32 [[Y]] to i64 ; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 899 ; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], -119 ; CHECK-NEXT: [[RES_4:%.*]] = add nuw nsw i1 [[RES_3]], false @@ -148,7 +151,7 @@ define internal i1 @f.sext(i32 %x, i32 %y) { } define i1 @caller.sext() { -; CHECK-LABEL: @caller.sext( +; CHECK-LABEL: define i1 @caller.sext() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.sext(i32 100, i32 -120) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.sext(i32 300, i32 900) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -162,8 +165,9 @@ define i1 @caller.sext() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fptosi(i32 %x) { -; CHECK-LABEL: @f.fptosi( -; CHECK-NEXT: [[TO_DOUBLE:%.*]] = sitofp i32 [[X:%.*]] to double +; CHECK-LABEL: define internal i1 @f.fptosi( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = sitofp i32 [[X]] to double ; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[TO_DOUBLE]] ; CHECK-NEXT: [[TO_I32:%.*]] = fptosi double [[ADD]] to i32 ; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i32 [[TO_I32]], 300 @@ -189,7 +193,7 @@ define internal i1 @f.fptosi(i32 %x) { } define i1 @caller.fptosi() { -; CHECK-LABEL: @caller.fptosi( +; CHECK-LABEL: define i1 @caller.fptosi() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fptosi(i32 100) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fptosi(i32 300) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -203,8 +207,9 @@ define i1 @caller.fptosi() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fpext(i16 %x) { -; CHECK-LABEL: @f.fpext( -; CHECK-NEXT: [[TO_FLOAT:%.*]] = sitofp i16 [[X:%.*]] to float +; CHECK-LABEL: define internal i1 @f.fpext( +; CHECK-SAME: i16 [[X:%.*]]) { +; CHECK-NEXT: [[TO_FLOAT:%.*]] = sitofp i16 [[X]] to float ; CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[TO_FLOAT]] to double ; CHECK-NEXT: [[TO_I64:%.*]] = fptoui float [[TO_FLOAT]] to i64 ; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 @@ -231,7 +236,7 @@ define internal i1 @f.fpext(i16 %x) { ; There's nothing we can do besides going to the full range or overdefined. define i1 @caller.fpext() { -; CHECK-LABEL: @caller.fpext( +; CHECK-LABEL: define i1 @caller.fpext() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fpext(i16 100) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fpext(i16 300) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -245,8 +250,9 @@ define i1 @caller.fpext() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.inttoptr.ptrtoint(i64 %x) { -; CHECK-LABEL: @f.inttoptr.ptrtoint( -; CHECK-NEXT: [[TO_PTR:%.*]] = inttoptr i64 [[X:%.*]] to ptr +; CHECK-LABEL: define internal i1 @f.inttoptr.ptrtoint( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[TO_PTR:%.*]] = inttoptr i64 [[X]] to ptr ; CHECK-NEXT: [[TO_I64:%.*]] = ptrtoint ptr [[TO_PTR]] to i64 ; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 ; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299 @@ -270,7 +276,7 @@ define internal i1 @f.inttoptr.ptrtoint(i64 %x) { } define i1 @caller.inttoptr.ptrtoint() { -; CHECK-LABEL: @caller.inttoptr.ptrtoint( +; CHECK-LABEL: define i1 @caller.inttoptr.ptrtoint() { ; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 100) ; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 300) ; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] @@ -284,8 +290,9 @@ define i1 @caller.inttoptr.ptrtoint() { ; Make sure we do not create constant ranges for int to fp casts. define i1 @int_range_to_double_cast(i32 %a) { -; CHECK-LABEL: @int_range_to_double_cast( -; CHECK-NEXT: [[R:%.*]] = and i32 [[A:%.*]], 255 +; CHECK-LABEL: define i1 @int_range_to_double_cast( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], 255 ; CHECK-NEXT: [[T4:%.*]] = sitofp i32 [[R]] to double ; CHECK-NEXT: [[T10:%.*]] = fadd double 0.000000e+00, [[T4]] ; CHECK-NEXT: [[T11:%.*]] = fcmp olt double [[T4]], [[T10]] @@ -300,7 +307,7 @@ define i1 @int_range_to_double_cast(i32 %a) { ; Make sure we do not use ranges to propagate info from vectors. define i16 @vector_binop_and_cast() { -; CHECK-LABEL: @vector_binop_and_cast( +; CHECK-LABEL: define i16 @vector_binop_and_cast() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x i16> , i16 undef, i32 0 ; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> , [[VECINIT7]] @@ -317,8 +324,9 @@ entry: } define internal i64 @f.sext_to_zext(i32 %t) { -; CHECK-LABEL: @f.sext_to_zext( -; CHECK-NEXT: [[A:%.*]] = zext nneg i32 [[T:%.*]] to i64 +; CHECK-LABEL: define internal range(i64 0, 2) i64 @f.sext_to_zext( +; CHECK-SAME: i32 [[T:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = zext nneg i32 [[T]] to i64 ; CHECK-NEXT: ret i64 [[A]] ; %a = sext i32 %t to i64 @@ -326,10 +334,11 @@ define internal i64 @f.sext_to_zext(i32 %t) { } define i64 @caller.sext_to_zext(i32 %i) { -; CHECK-LABEL: @caller.sext_to_zext( -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[I:%.*]], 9 +; CHECK-LABEL: define range(i64 0, 2) i64 @caller.sext_to_zext( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[I]], 9 ; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: [[T:%.*]] = call i64 @f.sext_to_zext(i32 [[CONV]]), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[T:%.*]] = call i64 @f.sext_to_zext(i32 [[CONV]]) ; CHECK-NEXT: ret i64 [[T]] ; %cmp = icmp sle i32 %i, 9 diff --git a/llvm/test/Transforms/SCCP/ipsccp-basic.ll b/llvm/test/Transforms/SCCP/ipsccp-basic.ll index 71c042b9b29467..6a7ab8ac2864cd 100644 --- a/llvm/test/Transforms/SCCP/ipsccp-basic.ll +++ b/llvm/test/Transforms/SCCP/ipsccp-basic.ll @@ -71,7 +71,7 @@ define void @test3a() { } define i32 @test3b() { -; CHECK-LABEL: define i32 @test3b() { +; CHECK-LABEL: define range(i32 0, 18) i32 @test3b() { ; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V]], 17 ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -105,7 +105,7 @@ define internal {i64,i64} @test4a() { } define i64 @test4b() personality ptr @__gxx_personality_v0 { -; CHECK-LABEL: define i64 @test4b() personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: define range(i64 0, 6) i64 @test4b() personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: [[A:%.*]] = invoke { i64, i64 } @test4a() ; CHECK-NEXT: to label [[A:%.*]] unwind label [[B:%.*]] ; CHECK: A: @@ -149,7 +149,7 @@ define internal {i64,i64} @test5a() { } define i64 @test5b() personality ptr @__gxx_personality_v0 { -; CHECK-LABEL: define i64 @test5b() personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: define range(i64 0, 6) i64 @test5b() personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: [[A:%.*]] = invoke { i64, i64 } @test5a() ; CHECK-NEXT: to label [[A:%.*]] unwind label [[B:%.*]] ; CHECK: A: diff --git a/llvm/test/Transforms/SCCP/switch.ll b/llvm/test/Transforms/SCCP/switch.ll index 306f0eebf2b408..5208213de210c1 100644 --- a/llvm/test/Transforms/SCCP/switch.ll +++ b/llvm/test/Transforms/SCCP/switch.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -S -passes=ipsccp < %s | FileCheck %s ; Make sure we always consider the default edge executable for a switch @@ -7,7 +7,7 @@ declare void @foo() declare i32 @g(i32) define void @test1() { -; CHECK-LABEL: @test1( +; CHECK-LABEL: define void @test1() { ; CHECK-NEXT: switch i32 undef, label [[D:%.*]] [ ; CHECK-NEXT: ] ; CHECK: d: @@ -21,15 +21,16 @@ d: } define i32 @test_duplicate_successors_phi(i1 %c, i32 %x) { -; CHECK-LABEL: @test_duplicate_successors_phi( +; CHECK-LABEL: define i32 @test_duplicate_successors_phi( +; CHECK-SAME: i1 [[C:%.*]], i32 [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[SWITCH:%.*]], label [[END:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[SWITCH:%.*]], label [[END:%.*]] ; CHECK: switch: ; CHECK-NEXT: br label [[SWITCH_DEFAULT:%.*]] ; CHECK: switch.default: ; CHECK-NEXT: ret i32 -1 ; CHECK: end: -; CHECK-NEXT: ret i32 [[X:%.*]] +; CHECK-NEXT: ret i32 [[X]] ; entry: br i1 %c, label %switch, label %end @@ -49,13 +50,14 @@ end: } define i32 @test_duplicate_successors_phi_2(i1 %c, i32 %x) { -; CHECK-LABEL: @test_duplicate_successors_phi_2( +; CHECK-LABEL: define i32 @test_duplicate_successors_phi_2( +; CHECK-SAME: i1 [[C:%.*]], i32 [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[SWITCH:%.*]], label [[END:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[SWITCH:%.*]], label [[END:%.*]] ; CHECK: switch: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ 1, [[SWITCH]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ 1, [[SWITCH]] ] ; CHECK-NEXT: ret i32 [[PHI]] ; entry: @@ -76,22 +78,23 @@ end: } define i32 @test_duplicate_successors_phi_3(i1 %c1, ptr %p, i32 %y) { -; CHECK-LABEL: @test_duplicate_successors_phi_3( +; CHECK-LABEL: define i32 @test_duplicate_successors_phi_3( +; CHECK-SAME: i1 [[C1:%.*]], ptr [[P:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C1:%.*]], label [[SWITCH:%.*]], label [[SWITCH_1:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[SWITCH:%.*]], label [[SWITCH_1:%.*]] ; CHECK: switch: -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: switch i32 [[X]], label [[SWITCH_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[SWITCH_DEFAULT]] -; CHECK-NEXT: i32 1, label [[SWITCH_0:%.*]] -; CHECK-NEXT: i32 2, label [[SWITCH_0]] +; CHECK-NEXT: i32 0, label [[SWITCH_DEFAULT]] +; CHECK-NEXT: i32 1, label [[SWITCH_0:%.*]] +; CHECK-NEXT: i32 2, label [[SWITCH_0]] ; CHECK-NEXT: ] ; CHECK: switch.default: ; CHECK-NEXT: ret i32 -1 ; CHECK: switch.0: ; CHECK-NEXT: ret i32 0 ; CHECK: switch.1: -; CHECK-NEXT: ret i32 [[Y:%.*]] +; CHECK-NEXT: ret i32 [[Y]] ; entry: br i1 %c1, label %switch, label %switch.1 @@ -118,12 +121,13 @@ switch.1: } define i32 @test_local_range(ptr %p) { -; CHECK-LABEL: @test_local_range( -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-LABEL: define range(i32 0, 3) i32 @test_local_range( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] ; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [ -; CHECK-NEXT: i32 0, label [[SWITCH_0:%.*]] -; CHECK-NEXT: i32 1, label [[SWITCH_1:%.*]] -; CHECK-NEXT: i32 2, label [[SWITCH_2:%.*]] +; CHECK-NEXT: i32 0, label [[SWITCH_0:%.*]] +; CHECK-NEXT: i32 1, label [[SWITCH_1:%.*]] +; CHECK-NEXT: i32 2, label [[SWITCH_2:%.*]] ; CHECK-NEXT: ] ; CHECK: default.unreachable: ; CHECK-NEXT: unreachable @@ -160,13 +164,14 @@ switch.3: ; TODO: Determine that case i3 is dead, even though the edge is shared? define i32 @test_duplicate_successors(ptr %p) { -; CHECK-LABEL: @test_duplicate_successors( -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-LABEL: define range(i32 0, 2) i32 @test_duplicate_successors( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] ; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [ -; CHECK-NEXT: i32 0, label [[SWITCH_0:%.*]] -; CHECK-NEXT: i32 1, label [[SWITCH_0]] -; CHECK-NEXT: i32 2, label [[SWITCH_1:%.*]] -; CHECK-NEXT: i32 3, label [[SWITCH_1]] +; CHECK-NEXT: i32 0, label [[SWITCH_0:%.*]] +; CHECK-NEXT: i32 1, label [[SWITCH_0]] +; CHECK-NEXT: i32 2, label [[SWITCH_1:%.*]] +; CHECK-NEXT: i32 3, label [[SWITCH_1]] ; CHECK-NEXT: ] ; CHECK: default.unreachable: ; CHECK-NEXT: unreachable @@ -201,11 +206,12 @@ switch.2: ; Case i32 2 is dead as well, but this cannot be determined based on ; range information. define internal i32 @test_ip_range(i32 %x) { -; CHECK-LABEL: @test_ip_range( -; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_UNREACHABLE:%.*]] [ -; CHECK-NEXT: i32 3, label [[SWITCH_3:%.*]] -; CHECK-NEXT: i32 1, label [[SWITCH_1:%.*]] -; CHECK-NEXT: i32 2, label [[SWITCH_2:%.*]] +; CHECK-LABEL: define internal range(i32 1, 4) i32 @test_ip_range( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [ +; CHECK-NEXT: i32 3, label [[SWITCH_3:%.*]] +; CHECK-NEXT: i32 1, label [[SWITCH_1:%.*]] +; CHECK-NEXT: i32 2, label [[SWITCH_2:%.*]] ; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]] ; CHECK: default.unreachable: ; CHECK-NEXT: unreachable @@ -240,9 +246,9 @@ switch.3: } define void @call_test_ip_range() { -; CHECK-LABEL: @call_test_ip_range( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @test_ip_range(i32 1), !range [[RNG2:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @test_ip_range(i32 3), !range [[RNG2]] +; CHECK-LABEL: define void @call_test_ip_range() { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @test_ip_range(i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @test_ip_range(i32 3) ; CHECK-NEXT: ret void ; call i32 @test_ip_range(i32 1) @@ -251,11 +257,12 @@ define void @call_test_ip_range() { } define i32 @test_switch_range_may_include_undef(i1 %c.1, i1 %c.2, i32 %x) { -; CHECK-LABEL: @test_switch_range_may_include_undef( +; CHECK-LABEL: define range(i32 -1, 21) i32 @test_switch_range_may_include_undef( +; CHECK-SAME: i1 [[C_1:%.*]], i1 [[C_2:%.*]], i32 [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]] ; CHECK: then.1: -; CHECK-NEXT: br i1 [[C_2:%.*]], label [[SWITCH:%.*]], label [[ELSE_2:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[SWITCH:%.*]], label [[ELSE_2:%.*]] ; CHECK: else.1: ; CHECK-NEXT: br label [[SWITCH]] ; CHECK: else.2: @@ -263,8 +270,8 @@ define i32 @test_switch_range_may_include_undef(i1 %c.1, i1 %c.2, i32 %x) { ; CHECK: switch: ; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[THEN_1]] ], [ 2, [[ELSE_1]] ], [ undef, [[ELSE_2]] ] ; CHECK-NEXT: switch i32 [[P]], label [[SWITCH_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[END_1:%.*]] -; CHECK-NEXT: i32 3, label [[END_2:%.*]] +; CHECK-NEXT: i32 0, label [[END_1:%.*]] +; CHECK-NEXT: i32 3, label [[END_2:%.*]] ; CHECK-NEXT: ] ; CHECK: switch.default: ; CHECK-NEXT: ret i32 -1 @@ -303,9 +310,10 @@ end.2: } define i32 @test_default_unreachable_by_dom_cond(i32 %x) { -; CHECK-LABEL: @test_default_unreachable_by_dom_cond( +; CHECK-LABEL: define i32 @test_default_unreachable_by_dom_cond( +; CHECK-SAME: i32 [[X:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 4 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X]], 4 ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT_UNREACHABLE:%.*]] [ @@ -371,4 +379,7 @@ return: declare void @llvm.assume(i1) -; CHECK: !1 = !{!"branch_weights", i32 1, i32 5, i32 3, i32 4} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 3} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 5, i32 3, i32 4} +;. diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll index fc3e56011d46cd..d3bac0d68a979f 100644 --- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll @@ -16,7 +16,7 @@ entry: } define i8 @range_from_or_nsw(i16 %a) { -; CHECK-LABEL: define i8 @range_from_or_nsw( +; CHECK-LABEL: define range(i8 -128, 0) i8 @range_from_or_nsw( ; CHECK-SAME: i16 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND1:%.*]] = or i16 [[A]], -128 @@ -30,7 +30,7 @@ entry: } define i16 @range_from_and_nuw_nsw(i32 %a) { -; CHECK-LABEL: define i16 @range_from_and_nuw_nsw( +; CHECK-LABEL: define range(i16 0, -32768) i16 @range_from_and_nuw_nsw( ; CHECK-SAME: i32 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], 32767 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll new file mode 100644 index 00000000000000..5ec6b4f1040d81 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-icmp-signed-op.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s + +define i32 @test(ptr %f, i16 %0) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[F:%.*]], i16 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[F]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> , i16 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> , i16 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[ZEXT_4:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: ret i32 [[ZEXT_4]] +; +entry: + %1 = load i16, ptr %f, align 2 + + %zext.0 = zext i16 %1 to i32 + %sext.0 = sext i16 %0 to i32 + + %zext.1 = zext i16 0 to i32 + %sext.1 = sext i16 0 to i32 + %zext.2 = zext i16 0 to i32 + %sext.2 = sext i16 0 to i32 + %zext.3 = zext i16 0 to i32 + %sext.3 = sext i16 0 to i32 + + %cmp.0 = icmp ule i32 %zext.0, %sext.0 + %cmp.1 = icmp ule i32 %zext.1, %sext.1 + %cmp.2 = icmp ule i32 %zext.2, %sext.2 + %cmp.3 = icmp ule i32 %zext.3, %sext.3 + + %and.0 = and i1 %cmp.0, %cmp.1 + %and.1 = and i1 %and.0, %cmp.2 + %and.2 = and i1 %and.1, %cmp.3 + + %zext.4 = zext i1 %and.2 to i32 + + ret i32 %zext.4 +} diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll index 757340527ec030..ef2d3219cca9b6 100644 --- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll +++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll @@ -627,7 +627,233 @@ else: ret void } +define i32 @test_assume_false(i32 %cond) { +; CHECK-LABEL: @test_assume_false( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[EXIT:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: default: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.assume(i1 true) +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %cond, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + br label %exit + +case1: + br label %exit + +case2: + br label %exit + +default: + br label %exit + +exit: + %bool = phi i1 [ false, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ] + %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ] + call void @llvm.assume(i1 %bool) + ret i32 %res +} + +define i32 @test_assume_undef(i32 %cond) { +; CHECK-LABEL: @test_assume_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[EXIT:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: default: +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.assume(i1 true) +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %cond, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + br label %exit + +case1: + br label %exit + +case2: + br label %exit + +default: + br label %exit + +exit: + %bool = phi i1 [ undef, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ] + %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ] + call void @llvm.assume(i1 %bool) + ret i32 %res +} + +define i32 @test_assume_var(i32 %cond, i1 %var) { +; CHECK-LABEL: @test_assume_var( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[EXIT:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: default: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[BOOL:%.*]] = phi i1 [ [[VAR:%.*]], [[DEFAULT]] ], [ true, [[CASE1]] ], [ true, [[CASE2]] ], [ true, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: call void @llvm.assume(i1 [[BOOL]]) +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %cond, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + br label %exit +case1: + br label %exit + +case2: + br label %exit + +default: + br label %exit + +exit: + %bool = phi i1 [ %var, %default ], [ true, %case0 ], [ true, %case1 ], [ true, %case2 ] + %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ] + call void @llvm.assume(i1 %bool) + ret i32 %res +} + +define i32 @test_assume_bundle_nonnull(i32 %cond, ptr nonnull %p) { +; CHECK-LABEL: @test_assume_bundle_nonnull( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[EXIT:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: default: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ null, [[DEFAULT]] ], [ [[P:%.*]], [[CASE1]] ], [ [[P]], [[CASE2]] ], [ [[P]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR]]) ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %cond, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + br label %exit + +case1: + br label %exit + +case2: + br label %exit + +default: + br label %exit + +exit: + %ptr = phi ptr [ null, %default ], [ %p, %case0 ], [ %p, %case1 ], [ %p, %case2 ] + %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ] + call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr) ] + ret i32 %res +} + +define i32 @test_assume_bundle_align(i32 %cond, ptr nonnull %p) { +; CHECK-LABEL: @test_assume_bundle_align( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[COND:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[EXIT:%.*]] +; CHECK-NEXT: i32 1, label [[CASE1:%.*]] +; CHECK-NEXT: i32 2, label [[CASE2:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: default: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ null, [[DEFAULT]] ], [ [[P:%.*]], [[CASE1]] ], [ [[P]], [[CASE2]] ], [ [[P]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[DEFAULT]] ], [ 2, [[CASE1]] ], [ 3, [[CASE2]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i32 8) ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %cond, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + br label %exit + +case1: + br label %exit + +case2: + br label %exit + +default: + br label %exit + +exit: + %ptr = phi ptr [ null, %default ], [ %p, %case0 ], [ %p, %case1 ], [ %p, %case2 ] + %res = phi i32 [ 0, %default ], [ 1, %case0 ], [ 2, %case1 ], [ 3, %case2 ] + call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i32 8) ] + ret i32 %res +} attributes #0 = { null_pointer_is_valid } ;. diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index b8a33f74bd570f..5503f7343cb672 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -434,8 +434,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) { // FIXME: When binutils 2.31 (containing gold 1.16) is the minimum // required version, this should be changed to: // get_wrap_symbols = tv->tv_u.tv_get_wrap_symbols; - get_wrap_symbols = - (ld_plugin_get_wrap_symbols)tv->tv_u.tv_message; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" + get_wrap_symbols = (ld_plugin_get_wrap_symbols)tv->tv_u.tv_message; +#pragma GCC diagnostic pop break; default: break; diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 80064ed9848517..1f183975d9481f 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -133,9 +133,7 @@ getInterfaceFile(const StringRef Filename, bool ResetBanner = true) { std::unique_ptr IF; switch (identify_magic(Buffer->getBuffer())) { case file_magic::macho_dynamically_linked_shared_lib: - LLVM_FALLTHROUGH; case file_magic::macho_dynamically_linked_shared_lib_stub: - LLVM_FALLTHROUGH; case file_magic::macho_universal_binary: IF = ExitOnErr(DylibReader::get(Buffer->getMemBufferRef())); break; diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index 3fa4b2cf73b6be..dddd2f73d4446b 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -81,6 +81,7 @@ TEST_F(IntrinsicsTest, InstrProfInheritance) { __ISA(InstrProfCoverInst, InstrProfCntrInstBase); __ISA(InstrProfIncrementInst, InstrProfCntrInstBase); __ISA(InstrProfIncrementInstStep, InstrProfIncrementInst); + __ISA(InstrProfCallsite, InstrProfCntrInstBase); __ISA(InstrProfTimestampInst, InstrProfCntrInstBase); __ISA(InstrProfValueProfileInst, InstrProfCntrInstBase); __ISA(InstrProfMCDCBitmapInstBase, InstrProfInstBase); @@ -94,6 +95,7 @@ TEST_F(IntrinsicsTest, InstrProfInheritance) { {Intrinsic::instrprof_cover, isInstrProfCoverInst}, {Intrinsic::instrprof_increment, isInstrProfIncrementInst}, {Intrinsic::instrprof_increment_step, isInstrProfIncrementInstStep}, + {Intrinsic::instrprof_callsite, isInstrProfCallsite}, {Intrinsic::instrprof_mcdc_condbitmap_update, isInstrProfMCDCCondBitmapUpdate}, {Intrinsic::instrprof_mcdc_parameters, diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 7e00a80cacf933..98dacd3511e1d8 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -122,14 +122,6 @@ MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") { return false; } -MemProfSchema getFullSchema() { - MemProfSchema Schema; -#define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name); -#include "llvm/ProfileData/MIBEntryDef.inc" -#undef MIBEntryDef - return Schema; -} - TEST(MemProf, FillsValue) { std::unique_ptr Symbolizer(new MockSymbolizer()); @@ -187,7 +179,7 @@ TEST(MemProf, FillsValue) { // Check the memprof record for foo. const llvm::GlobalValue::GUID FooId = IndexedMemProfRecord::getGUID("foo"); - ASSERT_EQ(Records.count(FooId), 1U); + ASSERT_TRUE(Records.contains(FooId)); const MemProfRecord &Foo = Records[FooId]; ASSERT_THAT(Foo.AllocSites, SizeIs(1)); EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U); @@ -203,7 +195,7 @@ TEST(MemProf, FillsValue) { // Check the memprof record for bar. const llvm::GlobalValue::GUID BarId = IndexedMemProfRecord::getGUID("bar"); - ASSERT_EQ(Records.count(BarId), 1U); + ASSERT_TRUE(Records.contains(BarId)); const MemProfRecord &Bar = Records[BarId]; ASSERT_THAT(Bar.AllocSites, SizeIs(1)); EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U); @@ -223,7 +215,7 @@ TEST(MemProf, FillsValue) { // Check the memprof record for xyz. const llvm::GlobalValue::GUID XyzId = IndexedMemProfRecord::getGUID("xyz"); - ASSERT_EQ(Records.count(XyzId), 1U); + ASSERT_TRUE(Records.contains(XyzId)); const MemProfRecord &Xyz = Records[XyzId]; ASSERT_THAT(Xyz.CallSites, SizeIs(1)); ASSERT_THAT(Xyz.CallSites[0], SizeIs(2)); @@ -234,7 +226,7 @@ TEST(MemProf, FillsValue) { // Check the memprof record for abc. const llvm::GlobalValue::GUID AbcId = IndexedMemProfRecord::getGUID("abc"); - ASSERT_EQ(Records.count(AbcId), 1U); + ASSERT_TRUE(Records.contains(AbcId)); const MemProfRecord &Abc = Records[AbcId]; EXPECT_TRUE(Abc.AllocSites.empty()); ASSERT_THAT(Abc.CallSites, SizeIs(1)); @@ -248,7 +240,7 @@ TEST(MemProf, PortableWrapper) { /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, /*dealloc_cpu=*/4); - const auto Schema = getFullSchema(); + const auto Schema = llvm::memprof::getFullSchema(); PortableMemInfoBlock WriteBlock(Info); std::string Buffer; @@ -271,7 +263,7 @@ TEST(MemProf, PortableWrapper) { // Version0 and Version1 serialize IndexedMemProfRecord in the same format, so // we share one test. TEST(MemProf, RecordSerializationRoundTripVersion0And1) { - const MemProfSchema Schema = getFullSchema(); + const auto Schema = llvm::memprof::getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, @@ -305,7 +297,7 @@ TEST(MemProf, RecordSerializationRoundTripVersion0And1) { } TEST(MemProf, RecordSerializationRoundTripVerion2) { - const MemProfSchema Schema = getFullSchema(); + const auto Schema = llvm::memprof::getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis index 0fc50f79466a45..d83ae8013c51e2 100644 --- a/llvm/utils/LLVMVisualizers/llvm.natvis +++ b/llvm/utils/LLVMVisualizers/llvm.natvis @@ -92,11 +92,11 @@ For later versions of Visual Studio, no setup is required. {($T1)(*(intptr_t *)Value.Data & $T5::PointerBitMask)} - {($T3)((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask)} - {$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data & $T5::PointerBitMask)} [{($T3)((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask)}] + {((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask)} + {$T5::IntMask}: {($T1)(*(intptr_t *)Value.Data & $T5::PointerBitMask)} [{((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask)}] ($T1)(*(intptr_t *)Value.Data & $T5::PointerBitMask) - ($T3)((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask) + ((*(intptr_t *)Value.Data >> $T5::IntShift) & $T5::IntMask) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 7a5d2be3ae95b2..88d353e89a4614 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -4246,7 +4246,7 @@ static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) { void CodeGenDAGPatterns::ParseOnePattern( Record *TheDef, TreePattern &Pattern, TreePattern &Result, - const std::vector &InstImpResults) { + const std::vector &InstImpResults, bool ShouldIgnore) { // Inline pattern fragments and expand multiple alternatives. Pattern.InlinePatternFragments(); @@ -4332,7 +4332,7 @@ void CodeGenDAGPatterns::ParseOnePattern( AddPatternToMatch(&Pattern, PatternToMatch(TheDef, Preds, T, Temp.getOnlyTree(), InstImpResults, Complexity, - TheDef->getID())); + TheDef->getID(), ShouldIgnore)); } } else { // Show a message about a dropped pattern with some info to make it @@ -4378,7 +4378,8 @@ void CodeGenDAGPatterns::ParsePatterns() { FindPatternInputsAndOutputs(Pattern, Pattern.getTree(j), InstInputs, InstResults, InstImpResults); - ParseOnePattern(CurPattern, Pattern, Result, InstImpResults); + ParseOnePattern(CurPattern, Pattern, Result, InstImpResults, + CurPattern->getValueAsBit("GISelShouldIgnore")); } } @@ -4407,10 +4408,10 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() { return; } - PatternsToMatch.emplace_back(P.getSrcRecord(), P.getPredicates(), - std::move(NewSrc), std::move(NewDst), - P.getDstRegs(), P.getAddedComplexity(), - Record::getNewUID(Records), Check); + PatternsToMatch.emplace_back( + P.getSrcRecord(), P.getPredicates(), std::move(NewSrc), + std::move(NewDst), P.getDstRegs(), P.getAddedComplexity(), + Record::getNewUID(Records), P.getGISelShouldIgnore(), Check); }; for (PatternToMatch &P : Copy) { @@ -4781,6 +4782,7 @@ void CodeGenDAGPatterns::GenerateVariants() { Variant, PatternsToMatch[i].getDstPatternShared(), PatternsToMatch[i].getDstRegs(), PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(Records), + PatternsToMatch[i].getGISelShouldIgnore(), PatternsToMatch[i].getHwModeFeatures()); } diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h index 7fcd39a9e940cc..7f94db0b7d5d76 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.h @@ -1057,17 +1057,19 @@ class PatternToMatch { TreePatternNodePtr DstPattern; // Resulting pattern. std::vector Dstregs; // Physical register defs being matched. std::string HwModeFeatures; - int AddedComplexity; // Add to matching pattern complexity. - unsigned ID; // Unique ID for the record. + int AddedComplexity; // Add to matching pattern complexity. + bool GISelShouldIgnore; // Should GlobalISel ignore importing this pattern. + unsigned ID; // Unique ID for the record. public: PatternToMatch(Record *srcrecord, ListInit *preds, TreePatternNodePtr src, TreePatternNodePtr dst, std::vector dstregs, - int complexity, unsigned uid, const Twine &hwmodefeatures = "") + int complexity, unsigned uid, bool ignore, + const Twine &hwmodefeatures = "") : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src), DstPattern(dst), Dstregs(std::move(dstregs)), HwModeFeatures(hwmodefeatures.str()), AddedComplexity(complexity), - ID(uid) {} + GISelShouldIgnore(ignore), ID(uid) {} Record *getSrcRecord() const { return SrcRecord; } ListInit *getPredicates() const { return Predicates; } @@ -1078,6 +1080,7 @@ class PatternToMatch { const std::vector &getDstRegs() const { return Dstregs; } StringRef getHwModeFeatures() const { return HwModeFeatures; } int getAddedComplexity() const { return AddedComplexity; } + bool getGISelShouldIgnore() const { return GISelShouldIgnore; } unsigned getID() const { return ID; } std::string getPredicateCheck() const; @@ -1240,7 +1243,8 @@ class CodeGenDAGPatterns { void ParseOnePattern(Record *TheDef, TreePattern &Pattern, TreePattern &Result, - const std::vector &InstImpResults); + const std::vector &InstImpResults, + bool ShouldIgnore = false); void AddPatternToMatch(TreePattern *Pattern, PatternToMatch &&PTM); void FindPatternInputsAndOutputs( TreePattern &I, TreePatternNodePtr Pat, diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index f2504775d557f2..0439df8067ede8 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -97,7 +97,7 @@ static ParameterKind getParameterKind(const Record *R) { if (R->getValueAsInt("isHalfOrFloat") || R->getValueAsInt("isI16OrI32")) { return ParameterKind::Overload; } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: llvm_unreachable("Support for specified DXIL Type not yet implemented"); } @@ -272,7 +272,7 @@ static std::string getOverloadKindStr(const Record *R) { return "OverloadKind::I16 | OverloadKind::I32"; } } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: llvm_unreachable( "Support for specified parameter OverloadKind not yet implemented"); diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 25e302ce1ca46f..78abf80e7aecb3 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2411,6 +2411,8 @@ void GlobalISelEmitter::run(raw_ostream &OS) { for (const PatternToMatch &Pat : CGP.ptms()) { ++NumPatternTotal; + if (Pat.getGISelShouldIgnore()) + continue; // skip without warning auto MatcherOrErr = runOnPattern(Pat); // The pattern analysis can fail, indicating an unsupported pattern. diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp index 26034e31ad8d19..217b531dcfd394 100644 --- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This tablegen backend emits the include file needed by the target -// parser to parse the RISC-V CPUs. +// This tablegen backend emits the include file needed by RISCVTargetParser.cpp +// and RISCVISAInfo.cpp to parse the RISC-V CPUs and extensions. // //===----------------------------------------------------------------------===// @@ -17,6 +17,94 @@ using namespace llvm; +static StringRef getExtensionName(const Record *R) { + StringRef Name = R->getValueAsString("Name"); + Name.consume_front("experimental-"); + return Name; +} + +static void printExtensionTable(raw_ostream &OS, + const std::vector &Extensions, + bool Experimental) { + OS << "static const RISCVSupportedExtension Supported"; + if (Experimental) + OS << "Experimental"; + OS << "Extensions[] = {\n"; + + for (Record *R : Extensions) { + if (R->getValueAsBit("Experimental") != Experimental) + continue; + + OS << " {\"" << getExtensionName(R) << "\", {" + << R->getValueAsInt("MajorVersion") << ", " + << R->getValueAsInt("MinorVersion") << "}},\n"; + } + + OS << "};\n\n"; +} + +// Get the extension name from the Record name. This gives the canonical +// capitalization. +static StringRef getExtensionNameFromRecordName(const Record *R) { + StringRef Name = R->getName(); + if (!Name.consume_front("FeatureStdExt")) + Name.consume_front("FeatureVendor"); + + return Name; +} + +static void emitRISCVExtensions(RecordKeeper &Records, raw_ostream &OS) { + OS << "#ifdef GET_SUPPORTED_EXTENSIONS\n"; + OS << "#undef GET_SUPPORTED_EXTENSIONS\n\n"; + + std::vector Extensions = + Records.getAllDerivedDefinitions("RISCVExtension"); + llvm::sort(Extensions, [](const Record *Rec1, const Record *Rec2) { + return getExtensionName(Rec1) < getExtensionName(Rec2); + }); + + printExtensionTable(OS, Extensions, /*Experimental=*/false); + printExtensionTable(OS, Extensions, /*Experimental=*/true); + + OS << "#endif // GET_SUPPORTED_EXTENSIONS\n\n"; + + OS << "#ifdef GET_IMPLIED_EXTENSIONS\n"; + OS << "#undef GET_IMPLIED_EXTENSIONS\n\n"; + + for (Record *Ext : Extensions) { + auto ImpliesList = Ext->getValueAsListOfDefs("Implies"); + if (ImpliesList.empty()) + continue; + + OS << "static const char *ImpliedExts" + << getExtensionNameFromRecordName(Ext) << "[] = {"; + + ListSeparator LS(", "); + for (auto *ImpliedExt : ImpliesList) { + if (!ImpliedExt->isSubClassOf("RISCVExtension")) + continue; + + OS << LS << '"' << getExtensionName(ImpliedExt) << '"'; + } + + OS << "};\n"; + } + + OS << "\nstatic constexpr ImpliedExtsEntry ImpliedExts[] = {\n"; + for (Record *Ext : Extensions) { + auto ImpliesList = Ext->getValueAsListOfDefs("Implies"); + if (ImpliesList.empty()) + continue; + + OS << " { {\"" << getExtensionName(Ext) << "\"}, {ImpliedExts" + << getExtensionNameFromRecordName(Ext) << "} },\n"; + } + + OS << "};\n\n"; + + OS << "#endif // GET_IMPLIED_EXTENSIONS\n\n"; +} + // We can generate march string from target features as what has been described // in RISC-V ISA specification (version 20191213) 'Chapter 27. ISA Extension // Naming Conventions'. @@ -54,7 +142,7 @@ static void printMArch(raw_ostream &OS, const Record &Rec) { OS << LS << Ext.first << Ext.second.Major << 'p' << Ext.second.Minor; } -static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) { +static void emitRISCVProcs(RecordKeeper &RK, raw_ostream &OS) { OS << "#ifndef PROC\n" << "#define PROC(ENUM, NAME, DEFAULT_MARCH, FAST_UNALIGNED_ACCESS)\n" << "#endif\n\n"; @@ -101,5 +189,11 @@ static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) { OS << "\n#undef TUNE_PROC\n"; } +static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) { + emitRISCVExtensions(RK, OS); + emitRISCVProcs(RK, OS); +} + static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef, - "Generate the list of CPU for RISCV"); + "Generate the list of CPUs and extensions for " + "RISC-V"); diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn index c9e081383fa026..0d27b786da1f63 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/modernize/BUILD.gn @@ -25,6 +25,7 @@ static_library("modernize") { "MakeSharedCheck.cpp", "MakeSmartPtrCheck.cpp", "MakeUniqueCheck.cpp", + "MinMaxUseInitializerListCheck.cpp", "ModernizeTidyModule.cpp", "PassByValueCheck.cpp", "RawStringLiteralCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 59dc38c8c4d8a8..815c5a93c72f75 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -35,6 +35,7 @@ static_library("readability") { "IsolateDeclarationCheck.cpp", "MagicNumbersCheck.cpp", "MakeMemberFunctionConstCheck.cpp", + "MathMissingParenthesesCheck.cpp", "MisleadingIndentationCheck.cpp", "MisplacedArrayIndexCheck.cpp", "NamedParameterCheck.cpp", diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h b/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h new file mode 100644 index 00000000000000..6c3643f7835cbe --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h @@ -0,0 +1,21 @@ +//===- RuntimeOpVerification.h - Op Verification ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H +#define MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H + +namespace mlir { +class DialectRegistry; + +namespace linalg { +void registerRuntimeVerifiableOpInterfaceExternalModels( + DialectRegistry ®istry); +} // namespace linalg +} // namespace mlir + +#endif // MLIR_DIALECT_LINALG_RUNTIMEOPVERIFICATION_H diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h index 8a57c6094c41c0..030be328e97fd0 100644 --- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h @@ -194,7 +194,7 @@ bool isLinearizableVector(VectorType type); /// for each dimension of the passed in tensor. Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source, ArrayRef readShape, Value padValue, - bool useInBoundsInsteadOfMasking = true); + bool useInBoundsInsteadOfMasking); /// Returns success if `inputVectorSizes` is a valid masking configuraion for /// given `shape`, i.e., it meets: diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index c4d788cf8ed316..d9db21073e15c7 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -45,6 +45,7 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Transforms/AllInterfaces.h" +#include "mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h" #include "mlir/Dialect/MLProgram/IR/MLProgram.h" #include "mlir/Dialect/MLProgram/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/MPI/IR/MPI.h" @@ -161,6 +162,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { cf::registerBufferDeallocationOpInterfaceExternalModels(registry); gpu::registerBufferDeallocationOpInterfaceExternalModels(registry); linalg::registerAllDialectInterfaceImplementations(registry); + linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry); memref::registerAllocationOpInterfaceExternalModels(registry); memref::registerBufferViewFlowOpInterfaceExternalModels(registry); memref::registerRuntimeVerifiableOpInterfaceExternalModels(registry); diff --git a/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td b/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td index d5f11d00cc3d2a..6fd0df59d9d2e0 100644 --- a/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td +++ b/mlir/include/mlir/Interfaces/RuntimeVerifiableOpInterface.td @@ -35,6 +35,12 @@ def RuntimeVerifiableOpInterface : OpInterface<"RuntimeVerifiableOpInterface"> { "::mlir::Location":$loc) >, ]; + + let extraClassDeclaration = [{ + /// Generate the error message that will be printed to the user when + /// verification fails. + static std::string generateErrorMessage(Operation *op, const std::string &msg); + }]; } #endif // MLIR_INTERFACES_RUNTIMEVERIFIABLEOPINTERFACE diff --git a/mlir/include/mlir/Tools/lsp-server-support/Transport.h b/mlir/include/mlir/Tools/lsp-server-support/Transport.h index 44c71058cf717c..ce742be7a941c9 100644 --- a/mlir/include/mlir/Tools/lsp-server-support/Transport.h +++ b/mlir/include/mlir/Tools/lsp-server-support/Transport.h @@ -147,15 +147,9 @@ class MessageHandler { void (ThisT::*handler)(const Param &)) { notificationHandlers[method] = [method, handler, thisPtr](llvm::json::Value rawParams) { - llvm::Expected param = - parse(rawParams, method, "notification"); - if (!param) { - return llvm::consumeError( - llvm::handleErrors(param.takeError(), [](const LSPError &lspError) { - Logger::error("JSON parsing error: {0}", - lspError.message.c_str()); - })); - } + llvm::Expected param = parse(rawParams, method, "request"); + if (!param) + return llvm::consumeError(param.takeError()); (thisPtr->*handler)(*param); }; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index ee6e391d0cc682..3b5282a09569d7 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -27,6 +27,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms NamedOpConversions.cpp Padding.cpp Promotion.cpp + RuntimeOpVerification.cpp Specialize.cpp Split.cpp SplitReduction.cpp @@ -60,6 +61,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms MLIRFuncDialect MLIRFuncToLLVM MLIRFuncTransforms + MLIRIndexDialect MLIRInferTypeOpInterface MLIRIR MLIRMemRefDialect diff --git a/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp new file mode 100644 index 00000000000000..b30182dc84079f --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/RuntimeOpVerification.cpp @@ -0,0 +1,135 @@ +//===- RuntimeOpVerification.cpp - Op Verification ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/RuntimeOpVerification.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/Index/IR/IndexAttrs.h" +#include "mlir/Dialect/Index/IR/IndexDialect.h" +#include "mlir/Dialect/Index/IR/IndexOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Interfaces/RuntimeVerifiableOpInterface.h" + +namespace mlir { +namespace linalg { +namespace { +/// Verify that the runtime sizes of the operands to linalg structured ops are +/// compatible with the runtime sizes inferred by composing the loop ranges with +/// the linalg op's indexing maps. This is similar to the verifier except that +/// here we insert IR to perform the verification at runtime. +template +struct StructuredOpInterface + : public RuntimeVerifiableOpInterface::ExternalModel< + StructuredOpInterface, T> { + void generateRuntimeVerification(Operation *op, OpBuilder &builder, + Location loc) const { + auto linalgOp = llvm::cast(op); + + SmallVector loopRanges = linalgOp.createLoopRanges(builder, loc); + auto [starts, ends, _] = getOffsetsSizesAndStrides(loopRanges); + + auto zero = builder.create(loc, 0); + auto one = builder.create(loc, 1); + + // Subtract one from the loop ends before composing with the indexing map + transform(ends, ends.begin(), [&](OpFoldResult end) { + auto endValue = getValueOrCreateConstantIndexOp(builder, loc, end); + return builder.createOrFold(loc, endValue, one); + }); + + for (OpOperand &opOperand : linalgOp->getOpOperands()) { + AffineMap indexingMap = linalgOp.getMatchingIndexingMap(&opOperand); + auto startIndices = affine::makeComposedFoldedMultiResultAffineApply( + builder, loc, indexingMap, starts); + auto endIndices = affine::makeComposedFoldedMultiResultAffineApply( + builder, loc, indexingMap, ends); + + for (auto dim : llvm::seq(linalgOp.getRank(&opOperand))) { + auto startIndex = + getValueOrCreateConstantIndexOp(builder, loc, startIndices[dim]); + auto endIndex = + getValueOrCreateConstantIndexOp(builder, loc, endIndices[dim]); + + // Generate: + // minIndex = min(startIndex, endIndex) + // assert(minIndex >= 0) + // To ensure we do not generate a negative index. We take the minimum of + // the start and end indices in order to handle reverse loops such as + // `affine_map<(i) -> (3 - i)>` + auto min = + builder.createOrFold(loc, startIndex, endIndex); + auto cmpOp = builder.createOrFold( + loc, index::IndexCmpPredicate::SGE, min, zero); + auto msg = RuntimeVerifiableOpInterface::generateErrorMessage( + linalgOp, "unexpected negative result on dimension #" + + std::to_string(dim) + " of input/output operand #" + + std::to_string(opOperand.getOperandNumber())); + builder.createOrFold(loc, cmpOp, msg); + + // Generate: + // inferredDimSize = max(startIndex, endIndex) + 1 + // actualDimSize = dim(operand) + // assert(inferredDimSize <= actualDimSize) + // To ensure that we do not index past the bounds of the operands. + auto max = + builder.createOrFold(loc, startIndex, endIndex); + + auto inferredDimSize = + builder.createOrFold(loc, max, one); + + auto actualDimSize = + createOrFoldDimOp(builder, loc, opOperand.get(), dim); + + // Similar to the verifier, when the affine expression in the indexing + // map is complicated, we just check that the inferred dimension sizes + // are in the boundary of the operands' size. Being more precise than + // that is difficult. + auto predicate = isa(indexingMap.getResult(dim)) + ? index::IndexCmpPredicate::EQ + : index::IndexCmpPredicate::SLE; + + cmpOp = builder.createOrFold( + loc, predicate, inferredDimSize, actualDimSize); + msg = RuntimeVerifiableOpInterface::generateErrorMessage( + linalgOp, "dimension #" + std::to_string(dim) + + " of input/output operand #" + + std::to_string(opOperand.getOperandNumber()) + + " is incompatible with inferred dimension size"); + builder.createOrFold(loc, cmpOp, msg); + } + } + } +}; + +template +void attachInterface(MLIRContext *ctx) { + (OpTs::template attachInterface>(*ctx), ...); +} +} // namespace +} // namespace linalg +} // namespace mlir + +void mlir::linalg::registerRuntimeVerifiableOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, LinalgDialect *) { + attachInterface< +#define GET_OP_LIST +#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" + >(ctx); + + // Load additional dialects of which ops may get created. + ctx->loadDialect(); + }); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index e836f0dc63b4f9..ef9a30be9a0153 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1499,11 +1499,11 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp, // If the input vector sizes are not provided, then the vector sizes are // determined by the result tensor shape. In case the vector sizes aren't // provided, we update the inBounds attribute instead of masking. - bool useInBoundsInsteadOfMasking = true; + bool useInBoundsInsteadOfMasking = false; if (inputVectorSizes.empty()) { ArrayRef resultTensorShape = packOp.getDestType().getShape(); inputVectorSizes = resultTensorShape.take_front(packOp.getSourceRank()); - useInBoundsInsteadOfMasking = false; + useInBoundsInsteadOfMasking = true; } // Create masked TransferReadOp. @@ -1612,7 +1612,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp, // to shape of source, then a mask is necessary. Value readResult = vector::createReadOrMaskedRead( rewriter, loc, unpackOp.getSource(), - ArrayRef(readMaskShape.begin(), readMaskShape.end()), padValue); + ArrayRef(readMaskShape.begin(), readMaskShape.end()), padValue, + /*useInBoundsInsteadOfMasking=*/false); PackingMetadata packMetadata; SmallVector lastDimToInsertPosPerm = @@ -1669,7 +1670,8 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp, (void)status; // prevent unused variable warning on non-assert builds assert(succeeded(status) && "failed to reify result shapes"); auto maskedRead = vector::createReadOrMaskedRead( - rewriter, loc, padOp.getSource(), inputVectorSizes, padValue); + rewriter, loc, padOp.getSource(), inputVectorSizes, padValue, + /*useInBoundsInsteadOfMasking=*/false); Operation *write = createWriteOrMaskedWrite( rewriter, loc, maskedRead, reifiedReturnShapes[0], inputVectorSizes); newResults.push_back(write->getResult(0)); diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp index 05b813a3b1e908..450bfa0cec0c7f 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp @@ -20,25 +20,6 @@ using namespace mlir; -/// Generate an error message string for the given op and the specified error. -static std::string generateErrorMessage(Operation *op, const std::string &msg) { - std::string buffer; - llvm::raw_string_ostream stream(buffer); - OpPrintingFlags flags; - // We may generate a lot of error messages and so we need to ensure the - // printing is fast. - flags.elideLargeElementsAttrs(); - flags.printGenericOpForm(); - flags.skipRegions(); - flags.useLocalScope(); - stream << "ERROR: Runtime op verification failed\n"; - op->print(stream, flags); - stream << "\n^ " << msg; - stream << "\nLocation: "; - op->getLoc().print(stream); - return stream.str(); -} - namespace mlir { namespace memref { namespace { @@ -62,8 +43,10 @@ struct CastOpInterface builder.create(loc, resultType.getRank()); Value isSameRank = builder.create( loc, arith::CmpIPredicate::eq, srcRank, resultRank); - builder.create(loc, isSameRank, - generateErrorMessage(op, "rank mismatch")); + builder.create( + loc, isSameRank, + RuntimeVerifiableOpInterface::generateErrorMessage(op, + "rank mismatch")); } // Get source offset and strides. We do not have an op to get offsets and @@ -101,8 +84,8 @@ struct CastOpInterface loc, arith::CmpIPredicate::eq, srcDimSz, resultDimSz); builder.create( loc, isSameSz, - generateErrorMessage(op, "size mismatch of dim " + - std::to_string(it.index()))); + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "size mismatch of dim " + std::to_string(it.index()))); } // Get result offset and strides. @@ -119,8 +102,10 @@ struct CastOpInterface builder.create(loc, resultOffset); Value isSameOffset = builder.create( loc, arith::CmpIPredicate::eq, srcOffset, resultOffsetVal); - builder.create(loc, isSameOffset, - generateErrorMessage(op, "offset mismatch")); + builder.create( + loc, isSameOffset, + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "offset mismatch")); } // Check strides. @@ -137,8 +122,8 @@ struct CastOpInterface loc, arith::CmpIPredicate::eq, srcStride, resultStrideVal); builder.create( loc, isSameStride, - generateErrorMessage(op, "stride mismatch of dim " + - std::to_string(it.index()))); + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "stride mismatch of dim " + std::to_string(it.index()))); } } }; @@ -178,7 +163,9 @@ struct LoadStoreOpInterface : andOp; } builder.create( - loc, assertCond, generateErrorMessage(op, "out-of-bounds access")); + loc, assertCond, + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "out-of-bounds access")); } }; @@ -248,7 +235,7 @@ struct ReinterpretCastOpInterface builder.create( loc, assertCond, - generateErrorMessage( + RuntimeVerifiableOpInterface::generateErrorMessage( op, "result of reinterpret_cast is out-of-bounds of the base memref")); } @@ -293,8 +280,8 @@ struct SubViewOpInterface builder.create( loc, assertCond, - generateErrorMessage(op, - "subview is out-of-bounds of the base memref")); + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "subview is out-of-bounds of the base memref")); } }; @@ -334,8 +321,9 @@ struct ExpandShapeOpInterface builder.create(loc, 0)); builder.create( loc, isModZero, - generateErrorMessage(op, "static result dims in reassoc group do not " - "divide src dim evenly")); + RuntimeVerifiableOpInterface::generateErrorMessage( + op, "static result dims in reassoc group do not " + "divide src dim evenly")); } } }; diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 3ff41ab22fbc42..5029ed4aa0387a 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -1609,6 +1609,9 @@ OpFoldResult ReshapeOp::fold(FoldAdaptor adaptor) { cst.has_value() && cst.value() == static_cast(id); continue; } + + dynamicNoop = false; + break; } if (dynamicNoop) diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp index 2f21c50c63473b..ac576ed0b4f097 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp @@ -437,8 +437,10 @@ struct OneDimMultiReductionToTwoDim auto loc = multiReductionOp.getLoc(); auto srcVectorType = multiReductionOp.getSourceVectorType(); auto srcShape = srcVectorType.getShape(); - auto castedType = VectorType::get(ArrayRef{1, srcShape.back()}, - srcVectorType.getElementType()); + auto castedType = VectorType::get( + ArrayRef{1, srcShape.back()}, srcVectorType.getElementType(), + ArrayRef{false, srcVectorType.getScalableDims().back()}); + auto accType = VectorType::get(ArrayRef{1}, srcVectorType.getElementType()); assert(!llvm::isa(multiReductionOp.getDestType()) && @@ -455,10 +457,11 @@ struct OneDimMultiReductionToTwoDim loc, accType, multiReductionOp.getAcc()); Value castMask; if (maskableOp.isMasked()) { - auto maskType = llvm::cast(mask.getType()); - auto castMaskType = - VectorType::get(ArrayRef{1, maskType.getShape().back()}, - maskType.getElementType()); + auto maskType = llvm::cast(mask.getType()); + auto castMaskType = VectorType::get( + ArrayRef{1, maskType.getShape().back()}, + maskType.getElementType(), + ArrayRef{false, maskType.getScalableDims().back()}); castMask = rewriter.create(loc, castMaskType, mask); } diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp index fcaf1ec944b479..6727f3f461722b 100644 --- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp +++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp @@ -345,7 +345,7 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc, int64_t readRank = readShape.size(); auto zero = builder.create(loc, 0); SmallVector inBoundsVal(readRank, true); - if (!useInBoundsInsteadOfMasking) { + if (useInBoundsInsteadOfMasking) { // Update the inBounds attribute. for (unsigned i = 0; i < readRank; i++) inBoundsVal[i] = (sourceShape[i] == readShape[i]) && @@ -359,7 +359,7 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc, /*padding=*/padValue, /*inBounds=*/inBoundsVal); - if (llvm::equal(readShape, sourceShape) || !useInBoundsInsteadOfMasking) + if (llvm::equal(readShape, sourceShape) || useInBoundsInsteadOfMasking) return transferReadOp; SmallVector mixedSourceDims = tensor::getMixedSizes(builder, loc, source); diff --git a/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp b/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp index 9205d8d8c34a29..561e8d33868748 100644 --- a/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp +++ b/mlir/lib/Interfaces/RuntimeVerifiableOpInterface.cpp @@ -11,6 +11,27 @@ namespace mlir { class Location; class OpBuilder; + +/// Generate an error message string for the given op and the specified error. +std::string +RuntimeVerifiableOpInterface::generateErrorMessage(Operation *op, + const std::string &msg) { + std::string buffer; + llvm::raw_string_ostream stream(buffer); + OpPrintingFlags flags; + // We may generate a lot of error messages and so we need to ensure the + // printing is fast. + flags.elideLargeElementsAttrs(); + flags.printGenericOpForm(); + flags.skipRegions(); + flags.useLocalScope(); + stream << "ERROR: Runtime op verification failed\n"; + op->print(stream, flags); + stream << "\n^ " << msg; + stream << "\nLocation: "; + op->getLoc().print(stream); + return stream.str(); +} } // namespace mlir /// Include the definitions of the interface. diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp index 339c5f3825165d..64dea35614c070 100644 --- a/mlir/lib/Tools/lsp-server-support/Transport.cpp +++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp @@ -51,12 +51,12 @@ class Reply { Reply::Reply(const llvm::json::Value &id, llvm::StringRef method, JSONTransport &transport, std::mutex &transportOutputMutex) - : method(method), id(id), transport(&transport), + : id(id), transport(&transport), transportOutputMutex(transportOutputMutex) {} Reply::Reply(Reply &&other) - : method(other.method), replied(other.replied.load()), - id(std::move(other.id)), transport(other.transport), + : replied(other.replied.load()), id(std::move(other.id)), + transport(other.transport), transportOutputMutex(other.transportOutputMutex) { other.transport = nullptr; } diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 24c7bdd9e1050e..4bfed475d44f60 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -1062,6 +1062,28 @@ module attributes { transform.target_tag = "start_here" } { return %result : tensor<10x18x15xf64> } + func.func @convolution_depthwise(%input: tensor<1x10x196x48xf32>, %filter: tensor<1x4x48xf32>) -> tensor<1x10x191x48xf32> { + %cst = arith.constant 0.0 : f32 + %empty = tensor.empty() : tensor<1x10x191x48xf32> + %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<1x10x191x48xf32>) -> tensor<1x10x191x48xf32> + // expected-remark @below {{convolution}} + // expected-remark @below {{batch dims 0}} + // expected-remark @below {{output image dims 1 : i64, 2 : i64}} + // expected-remark @below {{output channel dims}} + // expected-remark @below {{filter loop dims 4 : i64, 5 : i64}} + // expected-remark @below {{input channel dims}} + // expected-remark @below {{depth dims 3}} + // expected-remark @below {{strides 1 : i64, 1 : i64}} + // expected-remark @below {{dilations 1 : i64, 1 : i64}} + %result = linalg.depthwise_conv_2d_nhwc_hwc { + dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : tensor<1x10x196x48xf32>, tensor<1x4x48xf32>) + outs(%fill : tensor<1x10x191x48xf32>) -> tensor<1x10x191x48xf32> + + return %result : tensor<1x10x191x48xf32> + } + func.func @convolution_multi_channel(%input: tensor<2x34x68x16xf32>, %filter: tensor<8x2x3x5x16x16xf32>) -> tensor<8x32x32x16xf32> { %cst = arith.constant 0.0 : f32 %empty = tensor.empty() : tensor<8x32x32x16xf32> diff --git a/mlir/test/Dialect/Linalg/runtime-verification.mlir b/mlir/test/Dialect/Linalg/runtime-verification.mlir new file mode 100644 index 00000000000000..a4f29d8457e589 --- /dev/null +++ b/mlir/test/Dialect/Linalg/runtime-verification.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-opt %s -generate-runtime-verification | FileCheck %s + +// Most of the tests for linalg runtime-verification are implemented as integration tests. + +#identity = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: @static_dims +func.func @static_dims(%arg0: tensor<5xf32>, %arg1: tensor<5xf32>) -> (tensor<5xf32>) { + // CHECK: %[[TRUE:.*]] = index.bool.constant true + // CHECK: cf.assert %[[TRUE]] + %result = tensor.empty() : tensor<5xf32> + %0 = linalg.generic { + indexing_maps = [#identity, #identity, #identity], + iterator_types = ["parallel"] + } ins(%arg0, %arg1 : tensor<5xf32>, tensor<5xf32>) + outs(%result : tensor<5xf32>) { + ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) : + %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32 + linalg.yield %tmp1 : f32 + } -> tensor<5xf32> + return %0 : tensor<5xf32> +} + +// ----- + +#map = affine_map<() -> ()> + +// CHECK-LABEL: @scalars +func.func @scalars(%arg0: tensor, %arg1: tensor) -> (tensor) { + // No runtime checks are required if the operands are all scalars + // CHECK-NOT: cf.assert + %result = tensor.empty() : tensor + %0 = linalg.generic { + indexing_maps = [#map, #map, #map], + iterator_types = [] + } ins(%arg0, %arg1 : tensor, tensor) + outs(%result : tensor) { + ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) : + %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32 + linalg.yield %tmp1 : f32 + } -> tensor + return %0 : tensor +} diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 751c57eacd7ae5..9a4dd2f3b5cc11 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -2431,6 +2431,15 @@ func.func @reshape_nofold_2d(%arg0 : tensor) -> tensor { return %reshape : tensor } +// ----- + +// CHECK-LABEL: @reshape_nofold_2d_ins +func.func @reshape_nofold_2d_ins(%arg0 : tensor, %arg1: index, %arg2: index) -> tensor { + %ds = tensor.from_elements %arg1, %arg2 : tensor<2xindex> + // CHECK: tensor.reshape + %reshape = tensor.reshape %arg0(%ds) : (tensor, tensor<2xindex>) -> tensor + return %reshape : tensor +} // ----- diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir index 22808aa7d6acc3..f70d23a1932297 100644 --- a/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-lowering.mlir @@ -281,6 +281,23 @@ func.func private @scalable_dims(%A : vector<8x[4]x2xf32>, %B: vector<8x[4]xf32> // CHECK: %[[VAL_163:.*]] = vector.shape_cast %[[VAL_162]] : vector<[32]xf32> to vector<8x[4]xf32> // CHECK: return %[[VAL_163]] : vector<8x[4]xf32> +// Check that OneDimMultiReductionToTwoDim handles scalable dim +func.func @scalable_dim_1d(%A: vector<[4]xf32>, %B: f32, %C: vector<[4]xi1>) -> f32 { + %0 = vector.mask %C { vector.multi_reduction , %A, %B [0] : vector<[4]xf32> to f32 } : vector<[4]xi1> -> f32 + return %0 : f32 +} + +// CHECK-LABEL: func.func @scalable_dim_1d( +// CHECK-SAME: %[[ARG_0:.*]]: vector<[4]xf32>, +// CHECK-SAME: %[[ARG_1:.*]]: f32, +// CHECK-SAME: %[[ARG_2:.*]]: vector<[4]xi1>) -> f32 { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant dense<0.000000e+00> : vector<1xf32> +// CHECK: %[[VAL_2:.*]] = vector.mask %[[ARG_2]] { vector.reduction , %[[ARG_0]], %[[ARG_1]] : vector<[4]xf32> into f32 } : vector<[4]xi1> -> f32 +// CHECK: %[[VAL_3:.*]] = vector.insertelement %[[VAL_2]], %[[VAL_1]][%[[VAL_0]] : index] : vector<1xf32> +// CHECK: %[[VAL_4:.*]] = vector.extract %[[VAL_3]][0] : f32 from vector<1xf32> +// CHECK: return %[[VAL_4]] : f32 + module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) { %func_op = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.op<"func.func"> diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir new file mode 100644 index 00000000000000..b05ef9422e5967 --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir @@ -0,0 +1,298 @@ +// RUN: mlir-opt %s -generate-runtime-verification \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -convert-linalg-to-loops \ +// RUN: -expand-strided-metadata \ +// RUN: -lower-affine \ +// RUN: -convert-scf-to-cf \ +// RUN: -test-cf-assert \ +// RUN: -convert-index-to-llvm \ +// RUN: -finalize-memref-to-llvm \ +// RUN: -convert-func-to-llvm \ +// RUN: -reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils \ +// RUN: -shared-libs=%mlir_c_runner_utils 2>&1 | \ +// RUN: FileCheck %s + +func.func @main() { + %c5x = arith.constant dense<0.0> : tensor<5xf32> + %c4x = arith.constant dense<0.0> : tensor<4xf32> + %d5x = tensor.cast %c5x : tensor<5xf32> to tensor + %d4x = tensor.cast %c4x : tensor<4xf32> to tensor + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @simple_add(%d5x, %d5x) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + func.call @simple_add(%d5x, %d4x) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + func.call @simple_add(%d4x, %d5x) : (tensor, tensor) -> (tensor) + + %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32> + %c1x4 = arith.constant dense<0.0> : tensor<1x4xf32> + %c4x4 = arith.constant dense<0.0> : tensor<4x4xf32> + %c4x5 = arith.constant dense<0.0> : tensor<4x5xf32> + %c5x4 = arith.constant dense<0.0> : tensor<5x4xf32> + %d1x1 = tensor.cast %c1x1 : tensor<1x1xf32> to tensor + %d1x4 = tensor.cast %c1x4 : tensor<1x4xf32> to tensor + %d4x4 = tensor.cast %c4x4 : tensor<4x4xf32> to tensor + %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor + %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @broadcast_add(%d1x1, %d1x1) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @broadcast_add(%d1x1, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @broadcast_add(%d4x4, %d1x4) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size + func.call @broadcast_add(%d1x4, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size + func.call @broadcast_add(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @matmul_generic(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + func.call @matmul_generic(%d4x5, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @matmul_named(%d5x4, %d4x5) : (tensor, tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.matmul + // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size + func.call @matmul_named(%d4x5, %d4x5) : (tensor, tensor) -> (tensor) + + %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32> + %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32> + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>) + + // CHECK-NOT: ERROR: Runtime op verification failed + func.call @reverse_from_3(%d4x) : (tensor) -> (tensor) + + // CHECK: ERROR: Runtime op verification failed + // CHECK: linalg.generic + // CHECK: unexpected negative result on dimension #0 of input/output operand #0 + func.call @reverse_from_3(%d5x) : (tensor) -> (tensor) + + return +} + + +#identity1D = affine_map<(d0) -> (d0)> + +func.func @simple_add(%arg0: tensor, %arg1: tensor) -> (tensor) { + %c0 = arith.constant 0 : index + %dim = tensor.dim %arg0, %c0 : tensor + %result = tensor.empty(%dim) : tensor + %0 = linalg.generic { + indexing_maps = [#identity1D, #identity1D, #identity1D], + iterator_types = ["parallel"] + } ins(%arg0, %arg1 : tensor, tensor) + outs(%result : tensor) { + ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) : + %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32 + linalg.yield %tmp1 : f32 + } -> tensor + return %0 : tensor +} + +#broadcastD0 = affine_map<(d0, d1) -> (0, d1)> +#broadcastD1 = affine_map<(d0, d1) -> (d0, 0)> +#identity2D = affine_map<(d0, d1) -> (d0, d1)> + +func.func @broadcast_add(%arg0: tensor, %arg1: tensor) -> tensor { + // Calculate maximum dimension 0 + %c0 = arith.constant 0 : index + %dim = tensor.dim %arg0, %c0 : tensor + %dim_0 = tensor.dim %arg1, %c0 : tensor + %0 = arith.maxui %dim, %dim_0 : index + + // Calculate maximum dimension 1 + %c1 = arith.constant 1 : index + %dim_1 = tensor.dim %arg0, %c1 : tensor + %dim_2 = tensor.dim %arg1, %c1 : tensor + %1 = arith.maxui %dim_1, %dim_2 : index + + // Broadcast dimension 0 of %arg0 + %dim_3 = tensor.dim %arg0, %c0 : tensor + %2 = arith.cmpi eq, %dim_3, %c1 : index + %3 = scf.if %2 -> (tensor) { + %dim_7 = tensor.dim %arg0, %c1 : tensor + %12 = tensor.empty(%0, %dim_7) : tensor + %13 = linalg.generic { + indexing_maps = [#broadcastD0, #identity2D], + iterator_types = ["parallel", "parallel"] + } ins(%arg0 : tensor) outs(%12 : tensor) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor + scf.yield %13 : tensor + } else { + scf.yield %arg0 : tensor + } + + // Broadcast dimension 1 of %arg0 + %dim_4 = tensor.dim %3, %c1 : tensor + %4 = arith.cmpi eq, %dim_4, %c1 : index + %5 = scf.if %4 -> (tensor) { + %dim_7 = tensor.dim %3, %c0 : tensor + %12 = tensor.empty(%dim_7, %1) : tensor + %13 = linalg.generic { + indexing_maps = [#broadcastD1, #identity2D], + iterator_types = ["parallel", "parallel"] + } ins(%3 : tensor) outs(%12 : tensor) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor + scf.yield %13 : tensor + } else { + scf.yield %3 : tensor + } + + // Broadcast dimension 0 of %arg1 + %dim_5 = tensor.dim %arg1, %c0 : tensor + %6 = arith.cmpi eq, %dim_5, %c1 : index + %7 = scf.if %6 -> (tensor) { + %dim_7 = tensor.dim %arg1, %c1 : tensor + %12 = tensor.empty(%0, %dim_7) : tensor + %13 = linalg.generic { + indexing_maps = [#broadcastD0, #identity2D], + iterator_types = ["parallel", "parallel"] + } ins(%arg1 : tensor) outs(%12 : tensor) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor + scf.yield %13 : tensor + } else { + scf.yield %arg1 : tensor + } + + // Broadcast dimension 1 of %arg1 + %dim_6 = tensor.dim %7, %c1 : tensor + %8 = arith.cmpi eq, %dim_6, %c1 : index + %9 = scf.if %8 -> (tensor) { + %dim_7 = tensor.dim %7, %c0 : tensor + %12 = tensor.empty(%dim_7, %1) : tensor + %13 = linalg.generic { + indexing_maps = [#broadcastD1, #identity2D], + iterator_types = ["parallel", "parallel"] + } ins(%7 : tensor) outs(%12 : tensor) { + ^bb0(%in: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor + scf.yield %13 : tensor + } else { + scf.yield %7 : tensor + } + + // Perform element-wise computation + %10 = tensor.empty(%0, %1) : tensor + %11 = linalg.generic { + indexing_maps = [#identity2D, #identity2D, #identity2D], + iterator_types = ["parallel", "parallel"] + } ins(%5, %9 : tensor, tensor) outs(%10 : tensor) { + ^bb0(%in: f32, %in_7: f32, %out: f32): + %12 = arith.addf %in, %in_7 : f32 + linalg.yield %12 : f32 + } -> tensor + return %11 : tensor +} + +#matmul_accesses = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> +] +#matmul_trait = { + iterator_types = ["parallel", "parallel", "reduction"], + indexing_maps = #matmul_accesses +} + +func.func @matmul_generic(%arg0: tensor, %arg1: tensor) -> tensor { + %cf0 = arith.constant 0.0 : f32 + %ci0 = arith.constant 0 : index + %ci1 = arith.constant 1 : index + %d0 = tensor.dim %arg0, %ci0 : tensor + %d1 = tensor.dim %arg1, %ci1 : tensor + %splat = tensor.splat %cf0[%d0, %d1] : tensor + %0 = linalg.generic #matmul_trait ins(%arg0, %arg1 : tensor, tensor) outs(%splat : tensor) { + ^bb0(%in: f32, %in_0: f32, %out: f32): + %1 = arith.mulf %in, %in_0 : f32 + %2 = arith.addf %out, %1 : f32 + linalg.yield %2 : f32 + } -> tensor + return %0 : tensor +} + +func.func @matmul_named(%arg0: tensor, %arg1: tensor) -> tensor { + %cf0 = arith.constant 0.0 : f32 + %ci0 = arith.constant 0 : index + %ci1 = arith.constant 1 : index + %d0 = tensor.dim %arg0, %ci0 : tensor + %d1 = tensor.dim %arg1, %ci1 : tensor + %splat = tensor.splat %cf0[%d0, %d1] : tensor + %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%splat : tensor) -> tensor + return %0 : tensor +} + +#conv_trait = { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d2, d1 * 4 + d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction", "reduction"] +} + +func.func @conv(%arg0: tensor<16x29xf32>, %arg1: tensor<3x4xf32>) -> (tensor<5x7xf32>) { + %c0 = arith.constant 0.0 : f32 + %splat = tensor.splat %c0 : tensor<5x7xf32> + %result = linalg.generic #conv_trait ins(%arg0, %arg1 : tensor<16x29xf32>, tensor<3x4xf32>) outs(%splat : tensor<5x7xf32>) { + ^bb0(%in: f32, %in_64: f32, %out: f32): + %5 = arith.mulf %in, %in_64 : f32 + %6 = arith.addf %out, %5 : f32 + linalg.yield %6 : f32 + } -> tensor<5x7xf32> + return %result : tensor<5x7xf32> +} + +#reverse_trait = { + indexing_maps = [ + affine_map<(i) -> (3 - i)>, + affine_map<(i) -> (i)> + ], + iterator_types = ["parallel"] +} + +func.func @reverse_from_3(%arg0: tensor) -> (tensor) { + %cf0 = arith.constant 0.0 : f32 + %ci0 = arith.constant 0 : index + %d0 = tensor.dim %arg0, %ci0 : tensor + %splat = tensor.splat %cf0[%d0] : tensor + %result = linalg.generic #reverse_trait ins(%arg0: tensor) outs(%splat: tensor) { + ^bb0(%a: f32, %b: f32): + linalg.yield %a : f32 + } -> tensor + return %result : tensor +} diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 6d8aa290e82f25..6fad249a0b2fba 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -20,7 +20,6 @@ add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Target) -add_subdirectory(Tools) add_subdirectory(Transforms) if(MLIR_ENABLE_EXECUTION_ENGINE) diff --git a/mlir/unittests/Tools/CMakeLists.txt b/mlir/unittests/Tools/CMakeLists.txt deleted file mode 100644 index a97588d9286685..00000000000000 --- a/mlir/unittests/Tools/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(lsp-server-support) diff --git a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt b/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt deleted file mode 100644 index 3aa8b9c4bc7735..00000000000000 --- a/mlir/unittests/Tools/lsp-server-support/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_mlir_unittest(MLIRLspServerSupportTests - Transport.cpp -) -target_link_libraries(MLIRLspServerSupportTests - PRIVATE - MLIRLspServerSupportLib) diff --git a/mlir/unittests/Tools/lsp-server-support/Transport.cpp b/mlir/unittests/Tools/lsp-server-support/Transport.cpp deleted file mode 100644 index 48eae32a0fc3a4..00000000000000 --- a/mlir/unittests/Tools/lsp-server-support/Transport.cpp +++ /dev/null @@ -1,121 +0,0 @@ -//===- Transport.cpp - LSP JSON transport unit tests ----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Tools/lsp-server-support/Transport.h" -#include "mlir/Tools/lsp-server-support/Logging.h" -#include "mlir/Tools/lsp-server-support/Protocol.h" -#include "llvm/Support/FileSystem.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -using namespace mlir; -using namespace mlir::lsp; -using namespace testing; - -namespace { - -TEST(TransportTest, SendReply) { - std::string out; - llvm::raw_string_ostream os(out); - JSONTransport transport(nullptr, os); - MessageHandler handler(transport); - - transport.reply(1989, nullptr); - EXPECT_THAT(out, HasSubstr("\"id\":1989")); - EXPECT_THAT(out, HasSubstr("\"result\":null")); -} - -class TransportInputTest : public Test { - std::optional inputTempFile; - std::FILE *in = nullptr; - std::string output = ""; - llvm::raw_string_ostream os; - std::optional transport = std::nullopt; - std::optional messageHandler = std::nullopt; - -protected: - TransportInputTest() : os(output) {} - - void SetUp() override { - auto tempOr = llvm::sys::fs::TempFile::create("lsp-unittest-%%%%%%.json"); - ASSERT_TRUE((bool)tempOr); - llvm::sys::fs::TempFile t = std::move(*tempOr); - inputTempFile = std::move(t); - - in = std::fopen(inputTempFile->TmpName.c_str(), "r"); - transport.emplace(in, os, JSONStreamStyle::Delimited); - messageHandler.emplace(*transport); - } - - void TearDown() override { - EXPECT_FALSE(inputTempFile->discard()); - EXPECT_EQ(std::fclose(in), 0); - } - - void writeInput(StringRef buffer) { - std::error_code ec; - llvm::raw_fd_ostream os(inputTempFile->TmpName, ec); - ASSERT_FALSE(ec); - os << buffer; - os.close(); - } - - StringRef getOutput() const { return output; } - MessageHandler &getMessageHandler() { return *messageHandler; } - - void runTransport() { - bool gotEOF = false; - llvm::Error err = llvm::handleErrors( - transport->run(*messageHandler), [&](const llvm::ECError &ecErr) { - gotEOF = ecErr.convertToErrorCode() == std::errc::io_error; - }); - llvm::consumeError(std::move(err)); - EXPECT_TRUE(gotEOF); - } -}; - -TEST_F(TransportInputTest, RequestWithInvalidParams) { - struct Handler { - void onMethod(const TextDocumentItem ¶ms, - mlir::lsp::Callback callback) {} - } handler; - getMessageHandler().method("invalid-params-request", &handler, - &Handler::onMethod); - - writeInput("{\"jsonrpc\":\"2.0\",\"id\":92," - "\"method\":\"invalid-params-request\",\"params\":{}}\n"); - runTransport(); - EXPECT_THAT(getOutput(), HasSubstr("error")); - EXPECT_THAT(getOutput(), HasSubstr("missing value at (root).uri")); -} - -TEST_F(TransportInputTest, NotificationWithInvalidParams) { - // JSON parsing errors are only reported via error logging. As a result, this - // test can't make any expectations -- but it prints the output anyway, by way - // of demonstration. - Logger::setLogLevel(Logger::Level::Error); - - struct Handler { - void onNotification(const TextDocumentItem ¶ms) {} - } handler; - getMessageHandler().notification("invalid-params-notification", &handler, - &Handler::onNotification); - - writeInput("{\"jsonrpc\":\"2.0\",\"method\":\"invalid-params-notification\"," - "\"params\":{}}\n"); - runTransport(); -} - -TEST_F(TransportInputTest, MethodNotFound) { - writeInput("{\"jsonrpc\":\"2.0\",\"id\":29,\"method\":\"ack\"}\n"); - runTransport(); - EXPECT_THAT(getOutput(), HasSubstr("\"id\":29")); - EXPECT_THAT(getOutput(), HasSubstr("\"error\"")); - EXPECT_THAT(getOutput(), HasSubstr("\"message\":\"method not found: ack\"")); -} -} // namespace diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 3eb0f67bbd88a5..6a6f8fc1341002 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -11104,6 +11104,7 @@ cc_library( ":FuncTransforms", ":GPUDialect", ":IR", + ":IndexDialect", ":LinalgDialect", ":LinalgPassIncGen", ":LinalgStructuredOpsIncGen", @@ -11115,6 +11116,7 @@ cc_library( ":MeshShardingInterface", ":MeshTransforms", ":Pass", + ":RuntimeVerifiableOpInterface", ":SCFDialect", ":SCFTransforms", ":SCFUtils",