diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh index 9e670c447fbadd..b347c443da677f 100755 --- a/.ci/monolithic-linux.sh +++ b/.ci/monolithic-linux.sh @@ -18,7 +18,7 @@ set -o pipefail MONOREPO_ROOT="${MONOREPO_ROOT:="$(git rev-parse --show-toplevel)"}" BUILD_DIR="${BUILD_DIR:=${MONOREPO_ROOT}/build}" -rm -rf ${BUILD_DIR} +rm -rf "${BUILD_DIR}" ccache --zero-stats @@ -37,8 +37,8 @@ projects="${1}" targets="${2}" echo "--- cmake" -pip install -q -r ${MONOREPO_ROOT}/mlir/python/requirements.txt -cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \ +pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt +cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ -D CMAKE_BUILD_TYPE=Release \ diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 52ba13036f9159..4fd88ea81c84a8 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -19,7 +19,7 @@ set -o pipefail MONOREPO_ROOT="${MONOREPO_ROOT:="$(git rev-parse --show-toplevel)"}" BUILD_DIR="${BUILD_DIR:=${MONOREPO_ROOT}/build}" -rm -rf ${BUILD_DIR} +rm -rf "${BUILD_DIR}" if [[ -n "${CLEAR_CACHE:-}" ]]; then echo "clearing sccache" @@ -37,14 +37,14 @@ projects="${1}" targets="${2}" echo "--- cmake" -pip install -q -r ${MONOREPO_ROOT}/mlir/python/requirements.txt +pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt # The CMAKE_*_LINKER_FLAGS to disable the manifest come from research # on fixing a build reliability issue on the build server, please # see https://github.com/llvm/llvm-project/pull/82393 and # https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40 # for further information. -cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \ +cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ -D CMAKE_BUILD_TYPE=Release \ diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index fdadef9dcd3848..c9e037c225dd41 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3547,7 +3547,7 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) { if (!isMultiEntry()) return nullptr; - uint64_t NumEntries = 0; + uint64_t NumEntries = 1; if (hasCFG()) { for (BinaryBasicBlock *BB : BasicBlocks) { MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB); @@ -3580,7 +3580,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { return 0; // Check all secondary entries available as either basic blocks or lables. - uint64_t NumEntries = 0; + uint64_t NumEntries = 1; for (const BinaryBasicBlock *BB : BasicBlocks) { MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB); if (!EntrySymbol) @@ -3589,7 +3589,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const { return NumEntries; ++NumEntries; } - NumEntries = 0; + NumEntries = 1; for (const std::pair &KV : Labels) { MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second); if (!EntrySymbol) diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 6fcc4a956fa1a1..0f082086c1fc24 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -25,6 +25,25 @@ extern llvm::cl::opt ProfileUseDFS; namespace llvm { namespace bolt { +/// Set CallSiteInfo destination fields from \p Symbol and return a target +/// BinaryFunction for that symbol. +static const BinaryFunction *setCSIDestination(const BinaryContext &BC, + yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol) { + CSI.DestId = 0; // designated for unknown functions + CSI.EntryDiscriminator = 0; + if (Symbol) { + uint64_t EntryID = 0; + if (const BinaryFunction *const Callee = + BC.getFunctionForSymbol(Symbol, &EntryID)) { + CSI.DestId = Callee->getFunctionNumber(); + CSI.EntryDiscriminator = EntryID; + return Callee; + } + } + return nullptr; +} + yaml::bolt::BinaryFunctionProfile YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { yaml::bolt::BinaryFunctionProfile YamlBF; @@ -79,31 +98,20 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { continue; for (const IndirectCallProfile &CSP : ICSP.get()) { StringRef TargetName = ""; - CSI.DestId = 0; // designated for unknown functions - CSI.EntryDiscriminator = 0; - if (CSP.Symbol) { - const BinaryFunction *Callee = BC.getFunctionForSymbol(CSP.Symbol); - if (Callee) { - CSI.DestId = Callee->getFunctionNumber(); - TargetName = Callee->getOneName(); - } - } + const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol); + if (Callee) + TargetName = Callee->getOneName(); CSI.Count = CSP.Count; CSI.Mispreds = CSP.Mispreds; CSTargets.emplace_back(TargetName, CSI); } } else { // direct call or a tail call - uint64_t EntryID = 0; - CSI.DestId = 0; StringRef TargetName = ""; const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr); const BinaryFunction *const Callee = - BC.getFunctionForSymbol(CalleeSymbol, &EntryID); - if (Callee) { - CSI.DestId = Callee->getFunctionNumber(); - CSI.EntryDiscriminator = EntryID; + setCSIDestination(BC, CSI, CalleeSymbol); + if (Callee) TargetName = Callee->getOneName(); - } auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) { return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull); diff --git a/bolt/test/X86/yaml-secondary-entry-discriminator.s b/bolt/test/X86/yaml-secondary-entry-discriminator.s new file mode 100644 index 00000000000000..43c2e2a7f05549 --- /dev/null +++ b/bolt/test/X86/yaml-secondary-entry-discriminator.s @@ -0,0 +1,74 @@ +# This reproduces a bug with BOLT setting incorrect discriminator for +# secondary entry points in YAML profile. + +# REQUIRES: system-linux +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata -w %t.yaml --print-profile \ +# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG +# RUN: FileCheck %s -input-file %t.yaml +# CHECK: - name: main +# CHECK-NEXT: fid: 2 +# CHECK-NEXT: hash: 0xADF270D550151185 +# CHECK-NEXT: exec: 0 +# CHECK-NEXT: nblocks: 4 +# CHECK-NEXT: blocks: +# CHECK: - bid: 1 +# CHECK-NEXT: insns: 1 +# CHECK-NEXT: hash: 0x36A303CBA4360014 +# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ] +# CHECK: - bid: 2 +# CHECK-NEXT: insns: 5 +# CHECK-NEXT: hash: 0x8B2F5747CD0019 +# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ] + +# Make sure that the profile is attached correctly +# RUN: llvm-bolt %t.exe -o %t.out --data %t.yaml --print-profile \ +# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG + +# CHECK-CFG: Binary Function "main" after attaching profile { +# CHECK-CFG: callq secondary_entry # Offset: [[#]] # Count: 1 +# CHECK-CFG: callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) : +# CHECK-CFG-NEXT: { secondary_entry: 1 (1 misses) } + +.globl func +.type func, @function +func: +# FDATA: 0 [unknown] 0 1 func 0 1 0 + .cfi_startproc + pushq %rbp + movq %rsp, %rbp +.globl secondary_entry +secondary_entry: + popq %rbp + retq + nopl (%rax) + .cfi_endproc + .size func, .-func + +.globl main +.type main, @function +main: + .cfi_startproc + pushq %rbp + movq %rsp, %rbp + subq $16, %rsp + movl $0, -4(%rbp) + testq %rax, %rax + jne Lindcall +Lcall: + call secondary_entry +# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1 +Lindcall: + callq *%rax +# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1 + xorl %eax, %eax + addq $16, %rsp + popq %rbp + retq +# For relocations against .text + call exit + .cfi_endproc + .size main, .-main diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 128cf45f3179bc..7d72fb06320db7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -289,6 +289,9 @@ Improvements to Clang's diagnostics annotated with the ``clang::always_destroy`` attribute. Fixes #GH68686, #GH86486 +- ``-Wmicrosoft``, ``-Wgnu``, or ``-pedantic`` is now required to diagnose C99 + flexible array members in a union or alone in a struct. Fixes GH#84565. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f4ed3e892bc076..5c6e62e59721d9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6464,9 +6464,6 @@ def ext_c99_flexible_array_member : Extension< def err_flexible_array_virtual_base : Error< "flexible array member %0 not allowed in " "%select{struct|interface|union|class|enum}1 which has a virtual base class">; -def err_flexible_array_empty_aggregate : Error< - "flexible array member %0 not allowed in otherwise empty " - "%select{struct|interface|union|class|enum}1">; def err_flexible_array_has_nontrivial_dtor : Error< "flexible array member %0 of type %1 with non-trivial destruction">; def ext_flexible_array_in_struct : Extension< @@ -6481,8 +6478,6 @@ def ext_flexible_array_empty_aggregate_ms : Extension< "flexible array member %0 in otherwise empty " "%select{struct|interface|union|class|enum}1 is a Microsoft extension">, InGroup; -def err_flexible_array_union : Error< - "flexible array member %0 in a union is not allowed">; def ext_flexible_array_union_ms : Extension< "flexible array member %0 in a union is a Microsoft extension">, InGroup; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index ef8842a45ae4a1..d17e53847ebe7b 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9601,11 +9601,11 @@ static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(Ty); - unsigned EltSize = Context.getTypeSize(Info.ElementType); + uint64_t EltSize = Context.getTypeSize(Info.ElementType); if (Info.ElementType == Context.BoolTy) EltSize = 1; - unsigned MinElts = Info.EC.getKnownMinValue(); + uint64_t MinElts = Info.EC.getKnownMinValue(); return VScale->first * MinElts * EltSize; } diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index b873bc6737bb0a..c3774d0cb75edc 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -289,7 +289,7 @@ class ComplexExprEmitter const BinOpInfo &Op); QualType GetHigherPrecisionFPType(QualType ElementType) { - const auto *CurrentBT = dyn_cast(ElementType); + const auto *CurrentBT = cast(ElementType); switch (CurrentBT->getKind()) { case BuiltinType::Kind::Float16: return CGF.getContext().FloatTy; diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 00b04723f17dd2..3eadb19bd2058f 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -274,7 +274,7 @@ void AIXTargetCodeGenInfo::setTargetAttributes( if (!isa(GV)) return; - auto *GVar = dyn_cast(GV); + auto *GVar = cast(GV); auto GVId = GV->getName(); // Is this a global variable specified by the user as toc-data? diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 8d2baa4eb763df..e2b06f3c1492a9 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -770,8 +770,9 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC, void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, const InputInfo &Input, bool IsThinLTO) { - const bool IsOSAIX = ToolChain.getTriple().isOSAIX(); - const bool IsAMDGCN = ToolChain.getTriple().isAMDGCN(); + const llvm::Triple &Triple = ToolChain.getTriple(); + const bool IsOSAIX = Triple.isOSAIX(); + const bool IsAMDGCN = Triple.isAMDGCN(); const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath()); const Driver &D = ToolChain.getDriver(); bool ClosedNeeded = @@ -782,7 +783,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, const bool IsUnifiedLTO = Args.hasArg(options::OPT_funified_lto); if (llvm::sys::path::filename(Linker) != "ld.lld" && llvm::sys::path::stem(Linker) != "ld.lld" && !ClosedNeeded && - !ToolChain.getTriple().isOSOpenBSD()) { + !Triple.isOSOpenBSD()) { // Tell the linker to load the plugin. This has to come before // AddLinkerInputs as gold requires -plugin and AIX ld requires -bplugin to // come before any -plugin-opt/-bplugin_opt that -Wl might forward. @@ -851,7 +852,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, // the plugin. // Handle flags for selecting CPU variants. - std::string CPU = getCPUName(D, Args, ToolChain.getTriple()); + std::string CPU = getCPUName(D, Args, Triple); if (!CPU.empty()) CmdArgs.push_back( Args.MakeArgString(Twine(PluginOptPrefix) + ExtraDash + "mcpu=" + CPU)); @@ -982,10 +983,9 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, bool HasRoptr = Args.hasFlag(options::OPT_mxcoff_roptr, options::OPT_mno_xcoff_roptr, false); StringRef OptStr = HasRoptr ? "-mxcoff-roptr" : "-mno-xcoff-roptr"; - if (!IsOSAIX) D.Diag(diag::err_drv_unsupported_opt_for_target) - << OptStr << ToolChain.getTriple().str(); + << OptStr << Triple.str(); if (HasRoptr) { // The data sections option is on by default on AIX. We only need to error @@ -1048,7 +1048,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, } if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, - ToolChain.getTriple().hasDefaultEmulatedTLS())) { + Triple.hasDefaultEmulatedTLS())) { CmdArgs.push_back( Args.MakeArgString(Twine(PluginOptPrefix) + "-emulated-tls")); } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4c83a7a3a323be..b9144cf55452e2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4827,6 +4827,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, Right.is(TT_TemplateOpener)) { return true; } + if (Left.is(tok::identifier) && Right.is(tok::numeric_constant) && + Right.TokenText[0] == '.') { + return false; + } } else if (Style.isProto()) { if (Right.is(tok::period) && Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp index f8f5d8d53d5691..6476c5107cb5cc 100644 --- a/clang/lib/InstallAPI/Visitor.cpp +++ b/clang/lib/InstallAPI/Visitor.cpp @@ -255,7 +255,7 @@ bool InstallAPIVisitor::VisitFunctionDecl(const FunctionDecl *D) { return true; // Skip methods in CXX RecordDecls. - for (auto P : D->getASTContext().getParents(*M)) { + for (const DynTypedNode &P : D->getASTContext().getParents(*M)) { if (P.get()) return true; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8b44d24f5273aa..0bd88ece2aa544 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -19429,15 +19429,11 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, } else if (Record->isUnion()) DiagID = getLangOpts().MicrosoftExt ? diag::ext_flexible_array_union_ms - : getLangOpts().CPlusPlus - ? diag::ext_flexible_array_union_gnu - : diag::err_flexible_array_union; + : diag::ext_flexible_array_union_gnu; else if (NumNamedMembers < 1) DiagID = getLangOpts().MicrosoftExt ? diag::ext_flexible_array_empty_aggregate_ms - : getLangOpts().CPlusPlus - ? diag::ext_flexible_array_empty_aggregate_gnu - : diag::err_flexible_array_empty_aggregate; + : diag::ext_flexible_array_empty_aggregate_gnu; if (DiagID) Diag(FD->getLocation(), DiagID) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 2b4805d62d07d0..dce225a7204da8 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -2329,11 +2329,11 @@ void InitListChecker::CheckStructUnionTypes( break; } - // We've already initialized a member of a union. We're done. + // We've already initialized a member of a union. We can stop entirely. if (InitializedSomething && RD->isUnion()) - break; + return; - // If we've hit the flexible array member at the end, we're done. + // Stop if we've hit a flexible array member. if (Field->getType()->isIncompleteArrayType()) break; @@ -2456,6 +2456,11 @@ void InitListChecker::CheckStructUnionTypes( else CheckImplicitInitList(MemberEntity, IList, Field->getType(), Index, StructuredList, StructuredIndex); + + if (RD->isUnion() && StructuredList) { + // Initialize the first field within the union. + StructuredList->setInitializedFieldInUnion(*Field); + } } /// Expand a field designator that refers to a member of an diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 005529a53270c3..aab72dbaf48c46 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2974,7 +2974,7 @@ void DeclareImplicitDeductionGuidesForTypeAlias( if (auto *FPrime = SemaRef.InstantiateFunctionDeclaration( F, TemplateArgListForBuildingFPrime, AliasTemplate->getLocation(), Sema::CodeSynthesisContext::BuildingDeductionGuides)) { - auto *GG = dyn_cast(FPrime); + auto *GG = cast(FPrime); buildDeductionGuide(SemaRef, AliasTemplate, FPrimeTemplateParamList, GG->getCorrespondingConstructor(), GG->getExplicitSpecifier(), GG->getTypeSourceInfo(), diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 97f8445bf819c8..9a55881f644254 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5514,9 +5514,9 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate( QualType Obj2Ty; if (TPOC == TPOC_Call) { const FunctionProtoType *Proto1 = - FD1->getType()->getAs(); + FD1->getType()->castAs(); const FunctionProtoType *Proto2 = - FD2->getType()->getAs(); + FD2->getType()->castAs(); // - In the context of a function call, the function parameter types are // used. diff --git a/clang/test/C/drs/dr5xx.c b/clang/test/C/drs/dr5xx.c index 68bcef78baccd7..13464f78b6a654 100644 --- a/clang/test/C/drs/dr5xx.c +++ b/clang/test/C/drs/dr5xx.c @@ -29,7 +29,7 @@ void dr502(void) { */ struct t { int i; - struct { int a[]; }; /* expected-error {{flexible array member 'a' not allowed in otherwise empty struct}} + struct { int a[]; }; /* expected-warning {{flexible array member 'a' in otherwise empty struct is a GNU extension}} c89only-warning {{flexible array members are a C99 feature}} expected-warning {{'' may not be nested in a struct due to flexible array member}} */ diff --git a/clang/test/CodeGen/flexible-array-init.c b/clang/test/CodeGen/flexible-array-init.c index bae926da5feb07..15a30c15ac966e 100644 --- a/clang/test/CodeGen/flexible-array-init.c +++ b/clang/test/CodeGen/flexible-array-init.c @@ -3,9 +3,15 @@ struct { int x; int y[]; } a = { 1, 7, 11 }; // CHECK: @a ={{.*}} global { i32, [2 x i32] } { i32 1, [2 x i32] [i32 7, i32 11] } +struct { int y[]; } a1 = { 8, 12 }; +// CHECK: @a1 ={{.*}} global { [2 x i32] } { [2 x i32] [i32 8, i32 12] } + struct { int x; int y[]; } b = { 1, { 13, 15 } }; // CHECK: @b ={{.*}} global { i32, [2 x i32] } { i32 1, [2 x i32] [i32 13, i32 15] } +struct { int y[]; } b1 = { { 14, 16 } }; +// CHECK: @b1 ={{.*}} global { [2 x i32] } { [2 x i32] [i32 14, i32 16] } + // sizeof(c) == 8, so this global should be at least 8 bytes. struct { int x; char c; char y[]; } c = { 1, 2, { 13, 15 } }; // CHECK: @c ={{.*}} global { i32, i8, [2 x i8] } { i32 1, i8 2, [2 x i8] c"\0D\0F" } @@ -21,10 +27,79 @@ struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } e = { 1, 2 struct { int x; char y[]; } f = { 1, { 13, 15 } }; // CHECK: @f ={{.*}} global <{ i32, [2 x i8] }> <{ i32 1, [2 x i8] c"\0D\0F" }> -union { - struct { - int a; - char b[]; - } x; -} in_union = {}; -// CHECK: @in_union ={{.*}} global %union.anon zeroinitializer +struct __attribute((packed)) { short a; char z[]; } g = { 2, { 11, 13, 15 } }; +// CHECK: @g ={{.*}} <{ i16, [3 x i8] }> <{ i16 2, [3 x i8] c"\0B\0D\0F" }>, + +// Last member is the potential flexible array, unnamed initializer skips it. +struct { int a; union { int b; short x; }; int c; int d; } h = {1, 2, {}, 3}; +// CHECK: @h = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, i32 3 } +struct { int a; union { int b; short x[0]; }; int c; int d; } h0 = {1, 2, {}, 3}; +// CHECK: @h0 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, i32 3 } +struct { int a; union { int b; short x[1]; }; int c; int d; } h1 = {1, 2, {}, 3}; +// CHECK: @h1 = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, i32 3 } +struct { + int a; + union { + int b; + struct { + struct { } __ununsed; + short x[]; + }; + }; + int c; + int d; +} hiding = {1, 2, {}, 3}; +// CHECK: @hiding = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, i32 3 } +struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 3}; +// CHECK: @hf = global %struct.anon{{.*}} { i32 1, %union.anon{{.*}} { i32 2 }, i32 0, i32 3 } + +// First member is the potential flexible array, initialization requires braces. +struct { int a; union { short x; int b; }; int c; int d; } i = {1, 2, {}, 3}; +// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 0, i32 3 } +struct { int a; union { short x[0]; int b; }; int c; int d; } i0 = {1, {}, 2, 3}; +// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 2, i32 3 } +struct { int a; union { short x[1]; int b; }; int c; int d; } i1 = {1, {2}, {}, 3}; +// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 0, i32 3 } +struct { int a; union { short x[]; int b; }; int c; int d; } i_f = {4, {}, {}, 6}; +// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 0, i32 6 } + +// Named initializers; order doesn't matter. +struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 2, .c = 3}; +// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 3, i32 0 } +struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, .x = {2}, .c = 3}; +// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 0 } +struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, .x = {2}, .c = 3}; +// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 } + +struct { char a[]; } empty_struct = {}; +// CHECK: @empty_struct ={{.*}} global %struct.anon{{.*}} zeroinitializer, align 1 + +struct { char a[]; } empty_struct0 = {0}; +// CHECK: @empty_struct0 = global { [1 x i8] } zeroinitializer, align 1 + +union { struct { int a; char b[]; }; } struct_in_union = {}; +// CHECK: @struct_in_union = global %union.anon{{.*}} zeroinitializer, align 4 + +union { struct { int a; char b[]; }; } struct_in_union0 = {0}; +// CHECK: @struct_in_union0 = global %union.anon{{.*}} zeroinitializer, align 4 + +union { int a; char b[]; } trailing_in_union = {}; +// CHECK: @trailing_in_union = global %union.anon{{.*}} zeroinitializer, align 4 + +union { int a; char b[]; } trailing_in_union0 = {0}; +// CHECK: @trailing_in_union0 = global %union.anon{{.*}} zeroinitializer, align 4 + +union { char a[]; } only_in_union = {}; +// CHECK: @only_in_union = global %union.anon{{.*}} zeroinitializer, align 1 + +union { char a[]; } only_in_union0 = {0}; +// CHECK: @only_in_union0 = global { [1 x i8] } zeroinitializer, align 1 + +union { char a[]; int b; } first_in_union = {}; +// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } { [0 x i8] zeroinitializer, [4 x i8] undef }, align 4 + +union { char a[]; int b; } first_in_union0 = {0}; +// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } { [1 x i8] zeroinitializer, [3 x i8] undef }, align 4 + +union { char a[]; int b; } first_in_union123 = { {1, 2, 3} }; +// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 undef }, align 4 diff --git a/clang/test/CodeGen/flexible-array-init.cpp b/clang/test/CodeGen/flexible-array-init.cpp new file mode 100644 index 00000000000000..d067a614e1afe5 --- /dev/null +++ b/clang/test/CodeGen/flexible-array-init.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple i386-unknown-unknown -x c++ -emit-llvm -o - %s | FileCheck %s + +union _u { char a[]; } u = {}; +union _u0 { char a[]; } u0 = {0}; + +// CHECK: %union._u = type { [0 x i8] } + +// CHECK: @u = global %union._u zeroinitializer, align 1 +// CHECK: @u0 = global { [1 x i8] } zeroinitializer, align 1 + +union { char a[]; } z = {}; +// CHECK: @z = internal global %union.{{.*}} zeroinitializer, align 1 +union { char a[]; } z0 = {0}; +// CHECK: @z0 = internal global { [1 x i8] } zeroinitializer, align 1 + +/* C++ requires global anonymous unions have static storage, so we have to + reference them to keep them in the IR output. */ +char keep(int pick) +{ + if (pick) + return z.a[0]; + else + return z0.a[0]; +} diff --git a/clang/test/Sema/flexible-array-in-union.c b/clang/test/Sema/flexible-array-in-union.c index 5fabfbe0b1eaab..dd5e8069665fea 100644 --- a/clang/test/Sema/flexible-array-in-union.c +++ b/clang/test/Sema/flexible-array-in-union.c @@ -1,13 +1,188 @@ -// RUN: %clang_cc1 %s -verify=c -fsyntax-only -// RUN: %clang_cc1 %s -verify -fsyntax-only -x c++ -// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -// RUN: %clang_cc1 %s -verify -fsyntax-only -fms-compatibility -x c++ +// RUN: %clang_cc1 %s -verify=stock,c -fsyntax-only +// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -x c++ +// RUN: %clang_cc1 %s -verify=stock,cpp -fsyntax-only -fms-compatibility -x c++ +// RUN: %clang_cc1 %s -verify=stock,c,gnu -fsyntax-only -Wgnu-flexible-array-union-member -Wgnu-empty-struct +// RUN: %clang_cc1 %s -verify=stock,c,microsoft -fsyntax-only -fms-compatibility -Wmicrosoft // The test checks that an attempt to initialize union with flexible array // member with an initializer list doesn't crash clang. -union { char x[]; } r = {0}; // c-error {{flexible array member 'x' in a union is not allowed}} +union { char x[]; } r = {0}; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ +struct _name1 { + int a; + union { + int b; + char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + }; +} name1 = { + 10, + 42, /* initializes "b" */ +}; -// expected-no-diagnostics +struct _name1i { + int a; + union { + int b; + char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + }; +} name1i = { + .a = 10, + .b = 42, +}; + +/* Initialization of flexible array in a union is never allowed. */ +struct _name2 { + int a; + union { + int b; + char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + }; +} name2 = { + 12, + 13, + { 'c' }, /* c-warning {{excess elements in struct initializer}} + cpp-error {{excess elements in struct initializer}} + */ +}; + +/* Initialization of flexible array in a union is never allowed. */ +struct _name2i { + int a; + union { + int b; + char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + stock-note {{initialized flexible array member 'x' is here}} + */ + }; +} name2i = { + .a = 12, + .b = 13, /* stock-note {{previous initialization is here}} */ + .x = { 'c' }, /* stock-error {{initialization of flexible array member is not allowed}} + c-warning {{initializer overrides prior initialization of this subobject}} + cpp-error {{initializer partially overrides prior initialization of this subobject}} + */ +}; + +/* Flexible array initialization always allowed when not in a union, + and when struct has another member. + */ +struct _okay { + int a; + char x[]; +} okay = { + 22, + { 'x', 'y', 'z' }, +}; + +struct _okayi { + int a; + char x[]; +} okayi = { + .a = 22, + .x = { 'x', 'y', 'z' }, +}; + +struct _okay0 { + int a; + char x[]; +} okay0 = { }; + +struct _flex_extension { + char x[]; /* gnu-warning {{flexible array member 'x' in otherwise empty struct is a GNU extension}} + microsoft-warning {{flexible array member 'x' in otherwise empty struct is a Microsoft extension}} + */ +} flex_extension = { + { 'x', 'y', 'z' }, +}; + +struct _flex_extensioni { + char x[]; /* gnu-warning {{flexible array member 'x' in otherwise empty struct is a GNU extension}} + microsoft-warning {{flexible array member 'x' in otherwise empty struct is a Microsoft extension}} + */ +} flex_extensioni = { + .x = { 'x', 'y', 'z' }, +}; + +struct already_hidden { + int a; + union { + int b; + struct { + struct { } __empty; // gnu-warning {{empty struct is a GNU extension}} + char x[]; + }; + }; +}; +struct still_zero_sized { + struct { } __unused; // gnu-warning {{empty struct is a GNU extension}} + int x[]; +}; + +struct warn1 { + int a; + union { + int b; + char x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + }; +}; + +struct warn2 { + int x[]; /* gnu-warning {{flexible array member 'x' in otherwise empty struct is a GNU extension}} + microsoft-warning {{flexible array member 'x' in otherwise empty struct is a Microsoft extension}} + */ +}; + +union warn3 { + short x[]; /* gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ +}; + +struct quiet1 { + int a; + short x[]; +}; + +struct _not_at_end { + union { short x[]; }; /* stock-warning-re {{field '' with variable sized type '{{.*}}' not at the end of a struct or class is a GNU extension}} + gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + int y; +} not_at_end = {{}, 3}; + +struct _not_at_end_s { + struct { int a; short x[]; }; /* stock-warning-re {{field '' with variable sized type '{{.*}}' not at the end of a struct or class is a GNU extension}} */ + int y; +} not_at_end_s = {{}, 3}; + +struct { + int a; + union { /* stock-warning-re {{field '' with variable sized type '{{.*}}' not at the end of a struct or class is a GNU extension}} */ + short x[]; /* stock-note {{initialized flexible array member 'x' is here}} + gnu-warning {{flexible array member 'x' in a union is a GNU extension}} + microsoft-warning {{flexible array member 'x' in a union is a Microsoft extension}} + */ + int b; + }; + int c; + int d; +} i_f = { 4, + {5}, /* stock-error {{initialization of flexible array member is not allowed}} */ + {}, + 6}; + +// expected-no-diagnostics diff --git a/clang/test/Sema/transparent-union.c b/clang/test/Sema/transparent-union.c index c134a7a9b1c4d0..f02c2298b51ce1 100644 --- a/clang/test/Sema/transparent-union.c +++ b/clang/test/Sema/transparent-union.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wgnu-flexible-array-union-member %s typedef union { int *ip; float *fp; @@ -131,7 +131,7 @@ union pr15134v2 { union pr30520v { void b; } __attribute__((transparent_union)); // expected-error {{field has incomplete type 'void'}} -union pr30520a { int b[]; } __attribute__((transparent_union)); // expected-error {{flexible array member 'b' in a union is not allowed}} +union pr30520a { int b[]; } __attribute__((transparent_union)); // expected-warning {{flexible array member 'b' in a union is a GNU extension}} // expected-note@+1 2 {{forward declaration of 'struct stb'}} union pr30520s { struct stb b; } __attribute__((transparent_union)); // expected-error {{field has incomplete type 'struct stb'}} diff --git a/clang/tools/clang-format/clang-format-diff.py b/clang/tools/clang-format/clang-format-diff.py index 0a2c24743678d0..3a74b90e731578 100755 --- a/clang/tools/clang-format/clang-format-diff.py +++ b/clang/tools/clang-format/clang-format-diff.py @@ -138,6 +138,7 @@ def main(): ) # Reformat files containing changes in place. + has_diff = False for filename, lines in lines_by_file.items(): if args.i and args.verbose: print("Formatting {}".format(filename)) @@ -169,7 +170,7 @@ def main(): stdout, stderr = p.communicate() if p.returncode != 0: - sys.exit(p.returncode) + return p.returncode if not args.i: with open(filename) as f: @@ -185,9 +186,12 @@ def main(): ) diff_string = "".join(diff) if len(diff_string) > 0: + has_diff = True sys.stdout.write(diff_string) - sys.exit(1) + + if has_diff: + return 1 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d1e977dfa66af5..33dec7dae319f0 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -12075,6 +12075,7 @@ TEST_F(FormatTest, UnderstandsSquareAttributes) { verifyFormat("SomeType s [[gnu::unused]] (InitValue);"); verifyFormat("SomeType s [[using gnu: unused]] (InitValue);"); verifyFormat("[[gsl::suppress(\"clang-tidy-check-name\")]] void f() {}"); + verifyFormat("[[suppress(type.5)]] int uninitialized_on_purpose;"); verifyFormat("void f() [[deprecated(\"so sorry\")]];"); verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" " [[unused]] aaaaaaaaaaaaaaaaaaaaaaa(int i);"); diff --git a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp index 3e41f67ba922b7..17a596d712d0ca 100644 --- a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp @@ -136,7 +136,7 @@ TEST(ScudoStringsTest, CapacityIncreaseFails) { rlimit Limit = {}; EXPECT_EQ(0, getrlimit(RLIMIT_AS, &Limit)); - rlimit EmptyLimit = {.rlim_max = Limit.rlim_max}; + rlimit EmptyLimit = {.rlim_cur = 0, .rlim_max = Limit.rlim_max}; EXPECT_EQ(0, setrlimit(RLIMIT_AS, &EmptyLimit)); // Test requires that the default length is at least 6 characters. diff --git a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp index b7678678d8a294..add62c5a42a3e4 100644 --- a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp @@ -55,7 +55,7 @@ TEST(ScudoVectorTest, ReallocateFails) { rlimit Limit = {}; EXPECT_EQ(0, getrlimit(RLIMIT_AS, &Limit)); - rlimit EmptyLimit = {.rlim_max = Limit.rlim_max}; + rlimit EmptyLimit = {.rlim_cur = 0, .rlim_max = Limit.rlim_max}; EXPECT_EQ(0, setrlimit(RLIMIT_AS, &EmptyLimit)); V.resize(capacity); diff --git a/compiler-rt/test/tsan/signal_in_futex_wait.cpp b/compiler-rt/test/tsan/Linux/signal_in_futex_wait.cpp similarity index 94% rename from compiler-rt/test/tsan/signal_in_futex_wait.cpp rename to compiler-rt/test/tsan/Linux/signal_in_futex_wait.cpp index cf31e5467486ad..3c8804aae3d09c 100644 --- a/compiler-rt/test/tsan/signal_in_futex_wait.cpp +++ b/compiler-rt/test/tsan/Linux/signal_in_futex_wait.cpp @@ -1,6 +1,6 @@ // RUN: %clang_tsan %s -lstdc++ -o %t && %run %t 2>&1 | FileCheck %s -#include "test.h" +#include "../test.h" #include #include #include @@ -57,16 +57,13 @@ class Mutex { Mutex mutex; void *Thread(void *x) { - // fprintf(stderr, "canova here thread 0\n"); // Waiting for the futex. mutex.lock(); - // fprintf(stderr, "canova here thread 1\n"); // Finished waiting. return nullptr; } static void SigprofHandler(int signal, siginfo_t *info, void *context) { - // fprintf(stderr, "canova here sigprof handler\n"); // Unlock the futex. mutex.unlock(); } diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 03c7a823e6e96b..1497e32044e975 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -31,10 +31,15 @@ #define NAN __builtin_nanf("") #define FP_ILOGB0 (-INT_MAX - 1) -#define FP_ILOGBNAN INT_MAX - #define FP_LLOGB0 (-LONG_MAX - 1) + +#ifdef __FP_LOGBNAN_MIN +#define FP_ILOGBNAN (-INT_MAX - 1) +#define FP_LLOGBNAN (-LONG_MAX - 1) +#else +#define FP_ILOGBNAN INT_MAX #define FP_LLOGBNAN LONG_MAX +#endif #ifdef __FAST_MATH__ #define math_errhandling 0 diff --git a/libc/src/__support/FPUtil/BasicOperations.h b/libc/src/__support/FPUtil/BasicOperations.h index f746d7ac6ad41f..a47931bb33900a 100644 --- a/libc/src/__support/FPUtil/BasicOperations.h +++ b/libc/src/__support/FPUtil/BasicOperations.h @@ -30,36 +30,32 @@ template , int> = 0> LIBC_INLINE T fmin(T x, T y) { const FPBits bitx(x), bity(y); - if (bitx.is_nan()) { + if (bitx.is_nan()) return y; - } else if (bity.is_nan()) { + if (bity.is_nan()) return x; - } else if (bitx.sign() != bity.sign()) { + if (bitx.sign() != bity.sign()) // To make sure that fmin(+0, -0) == -0 == fmin(-0, +0), whenever x and // y has different signs and both are not NaNs, we return the number // with negative sign. - return (bitx.is_neg()) ? x : y; - } else { - return (x < y ? x : y); - } + return bitx.is_neg() ? x : y; + return x < y ? x : y; } template , int> = 0> LIBC_INLINE T fmax(T x, T y) { FPBits bitx(x), bity(y); - if (bitx.is_nan()) { + if (bitx.is_nan()) return y; - } else if (bity.is_nan()) { + if (bity.is_nan()) return x; - } else if (bitx.sign() != bity.sign()) { + if (bitx.sign() != bity.sign()) // To make sure that fmax(+0, -0) == +0 == fmax(-0, +0), whenever x and // y has different signs and both are not NaNs, we return the number // with positive sign. - return (bitx.is_neg() ? y : x); - } else { - return (x > y ? x : y); - } + return bitx.is_neg() ? y : x; + return x > y ? x : y; } template , int> = 0> diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 54de6d1603cf41..8d7ae630246fb8 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2724,14 +2724,8 @@ void LinkerDriver::link(opt::InputArgList &args) { parseFiles(files, armCmseImpLib); - // Now that we have every file, we can decide if we will need a - // dynamic symbol table. - // We need one if we were asked to export dynamic symbols or if we are - // producing a shared library. - // We also need one if any shared libraries are used and for pie executables - // (probably because the dynamic linker needs it). - config->hasDynSymTab = - !ctx.sharedFiles.empty() || config->isPic || config->exportDynamic; + // Create dynamic sections for dynamic linking and static PIE. + config->hasDynSymTab = !ctx.sharedFiles.empty() || config->isPic; script->addScriptReferencedSymbolsToSymTable(); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index cd2b9e22ab3224..93653def328f82 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -539,8 +539,8 @@ void elf::reportDuplicate(const Symbol &sym, const InputFile *newFile, if (!d->section && !errSec && errOffset && d->value == errOffset) return; if (!d->section || !errSec) { - error("duplicate symbol: " + toString(sym) + "\n>>> defined in " + - toString(sym.file) + "\n>>> defined in " + toString(newFile)); + errorOrWarn("duplicate symbol: " + toString(sym) + "\n>>> defined in " + + toString(sym.file) + "\n>>> defined in " + toString(newFile)); return; } @@ -564,7 +564,7 @@ void elf::reportDuplicate(const Symbol &sym, const InputFile *newFile, if (!src2.empty()) msg += src2 + "\n>>> "; msg += obj2; - error(msg); + errorOrWarn(msg); } void Symbol::checkDuplicate(const Defined &other) const { diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index c5c0c8a89e2879..279423720be9d5 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -323,11 +323,7 @@ void TextOutputSection::finalize() { thunkInfo.isec = makeSyntheticInputSection(isec->getSegName(), isec->getName()); thunkInfo.isec->parent = this; - - // This code runs after dead code removal. Need to set the `live` bit - // on the thunk isec so that asserts that check that only live sections - // get written are happy. - thunkInfo.isec->live = true; + assert(thunkInfo.isec->live); StringRef thunkName = saver().save(funcSym->getName() + ".thunk." + std::to_string(thunkInfo.sequence++)); diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index f820513a111ea3..7b45f7f4c39a1b 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -135,6 +135,7 @@ struct Configuration { bool emitEncryptionInfo = false; bool emitInitOffsets = false; bool emitChainedFixups = false; + bool emitRelativeMethodLists = false; bool thinLTOEmitImportsFiles; bool thinLTOEmitIndexFiles; bool thinLTOIndexOnly; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 14c111ce9685c9..65de531db04b75 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1086,6 +1086,22 @@ static bool shouldEmitChainedFixups(const InputArgList &args) { return isRequested; } +static bool shouldEmitRelativeMethodLists(const InputArgList &args) { + const Arg *arg = args.getLastArg(OPT_objc_relative_method_lists, + OPT_no_objc_relative_method_lists); + if (arg && arg->getOption().getID() == OPT_objc_relative_method_lists) + return true; + if (arg && arg->getOption().getID() == OPT_no_objc_relative_method_lists) + return false; + + // TODO: If no flag is specified, don't default to false, but instead: + // - default false on < ios14 + // - default true on >= ios14 + // For now, until this feature is confirmed stable, default to false if no + // flag is explicitly specified + return false; +} + void SymbolPatterns::clear() { literals.clear(); globs.clear(); @@ -1630,6 +1646,7 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, config->emitChainedFixups = shouldEmitChainedFixups(args); config->emitInitOffsets = config->emitChainedFixups || args.hasArg(OPT_init_offsets); + config->emitRelativeMethodLists = shouldEmitRelativeMethodLists(args); config->icfLevel = getICFLevel(args); config->dedupStrings = args.hasFlag(OPT_deduplicate_strings, OPT_no_deduplicate_strings, true); diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 22930d52dd1db2..5c1e07cd21b1fb 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -46,6 +46,14 @@ void lld::macho::addInputSection(InputSection *inputSection) { if (auto *isec = dyn_cast(inputSection)) { if (isec->isCoalescedWeak()) return; + if (config->emitRelativeMethodLists && + ObjCMethListSection::isMethodList(isec)) { + if (in.objcMethList->inputOrder == UnspecifiedInputOrder) + in.objcMethList->inputOrder = inputSectionsOrder++; + in.objcMethList->addInput(isec); + isec->parent = in.objcMethList; + return; + } if (config->emitInitOffsets && sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { in.initOffsets->addInput(isec); @@ -273,6 +281,9 @@ ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, Section §ion = *make
(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0); auto isec = make(section, data, align); + // Since this is an explicitly created 'fake' input section, + // it should not be dead stripped. + isec->live = true; section.subsections.push_back({0, isec}); return isec; } diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 694bdf734907ba..0f389e50425a32 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -149,6 +149,7 @@ class ConcatInputSection final : public InputSection { }; // Initialize a fake InputSection that does not belong to any InputFile. +// The created ConcatInputSection will always have 'live=true' ConcatInputSection *makeSyntheticInputSection(StringRef segName, StringRef sectName, uint32_t flags = 0, @@ -342,6 +343,7 @@ constexpr const char moduleTermFunc[] = "__mod_term_func"; constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; constexpr const char objcCatList[] = "__objc_catlist"; constexpr const char objcClassList[] = "__objc_classlist"; +constexpr const char objcMethList[] = "__objc_methlist"; constexpr const char objcClassRefs[] = "__objc_classrefs"; constexpr const char objcConst[] = "__objc_const"; constexpr const char objCImageInfo[] = "__objc_imageinfo"; diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp index f736360624ebd1..2a31a5c09cdd22 100644 --- a/lld/MachO/MapFile.cpp +++ b/lld/MachO/MapFile.cpp @@ -197,18 +197,24 @@ void macho::writeMapFile() { seg->name.str().c_str(), osec->name.str().c_str()); } + // Shared function to print an array of symbols. + auto printIsecArrSyms = [&](const std::vector &arr) { + for (const ConcatInputSection *isec : arr) { + for (Defined *sym : isec->symbols) { + if (!(isPrivateLabel(sym->getName()) && sym->size == 0)) + os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), + sym->size, readerToFileOrdinal[sym->getFile()], + sym->getName().str().data()); + } + } + }; + os << "# Symbols:\n"; os << "# Address\tSize \tFile Name\n"; for (const OutputSegment *seg : outputSegments) { for (const OutputSection *osec : seg->getSections()) { if (auto *concatOsec = dyn_cast(osec)) { - for (const InputSection *isec : concatOsec->inputs) { - for (Defined *sym : isec->symbols) - if (!(isPrivateLabel(sym->getName()) && sym->size == 0)) - os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), - sym->size, readerToFileOrdinal[sym->getFile()], - sym->getName().str().data()); - } + printIsecArrSyms(concatOsec->inputs); } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); uint64_t lastAddr = 0; // strings will never start at address 0, so this @@ -237,6 +243,8 @@ void macho::writeMapFile() { printNonLazyPointerSection(os, in.got); } else if (osec == in.tlvPointers) { printNonLazyPointerSection(os, in.tlvPointers); + } else if (osec == in.objcMethList) { + printIsecArrSyms(in.objcMethList->getInputs()); } // TODO print other synthetic sections } diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h index 9fbe984e6223ec..8081605670c519 100644 --- a/lld/MachO/ObjC.h +++ b/lld/MachO/ObjC.h @@ -22,6 +22,8 @@ constexpr const char klassPropList[] = "__OBJC_$_CLASS_PROP_LIST_"; constexpr const char metaclass[] = "_OBJC_METACLASS_$_"; constexpr const char ehtype[] = "_OBJC_EHTYPE_$_"; constexpr const char ivar[] = "_OBJC_IVAR_$_"; +constexpr const char instanceMethods[] = "__OBJC_$_INSTANCE_METHODS_"; +constexpr const char classMethods[] = "__OBJC_$_CLASS_METHODS_"; constexpr const char listProprieties[] = "__OBJC_$_PROP_LIST_"; constexpr const char category[] = "__OBJC_$_CATEGORY_"; diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 0d8ee2a0926be2..19f8509ba714bd 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1284,6 +1284,12 @@ def fixup_chains_section : Flag<["-"], "fixup_chains_section">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group; +def objc_relative_method_lists : Flag<["-"], "objc_relative_method_lists">, + HelpText<"Emit relative method lists (more compact representation)">, + Group; +def no_objc_relative_method_lists : Flag<["-"], "no_objc_relative_method_lists">, + HelpText<"Don't emit relative method lists (use traditional representation)">, + Group; def flto_codegen_only : Flag<["-"], "flto-codegen-only">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 825242f2cc72ff..755ff270e2f7a9 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -377,7 +377,7 @@ static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, // live. Marking the isec live ensures an OutputSection is created that the // start/end symbol can refer to. assert(sym.isLive()); - isec->live = true; + assert(isec->live); // This runs after gatherInputSections(), so need to explicitly set parent // and add to inputSections. diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 0afbbd478bb9fd..6f6b66118b7a94 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -12,6 +12,7 @@ #include "ExportTrie.h" #include "InputFiles.h" #include "MachOStructs.h" +#include "ObjC.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -849,7 +850,7 @@ ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) { S_LITERAL_POINTERS | S_ATTR_NO_DEAD_STRIP, ArrayRef{selrefData, wordSize}, /*align=*/wordSize); - objcSelref->live = true; + assert(objcSelref->live); objcSelref->relocs.push_back({/*type=*/target->unsignedRelocType, /*pcrel=*/false, /*length=*/3, /*offset=*/0, @@ -1975,6 +1976,241 @@ void InitOffsetsSection::setUp() { } } +ObjCMethListSection::ObjCMethListSection() + : SyntheticSection(segment_names::text, section_names::objcMethList) { + flags = S_ATTR_NO_DEAD_STRIP; + align = relativeOffsetSize; +} + +// Go through all input method lists and ensure that we have selrefs for all +// their method names. The selrefs will be needed later by ::writeTo. We need to +// create them early on here to ensure they are processed correctly by the lld +// pipeline. +void ObjCMethListSection::setUp() { + for (const ConcatInputSection *isec : inputs) { + uint32_t structSizeAndFlags = 0, structCount = 0; + readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount); + uint32_t originalStructSize = structSizeAndFlags & structSizeMask; + // Method name is immediately after header + uint32_t methodNameOff = methodListHeaderSize; + + // Loop through all methods, and ensure a selref for each of them exists. + while (methodNameOff < isec->data.size()) { + const Reloc *reloc = isec->getRelocAt(methodNameOff); + assert(reloc && "Relocation expected at method list name slot"); + auto *def = dyn_cast_or_null(reloc->referent.get()); + assert(def && "Expected valid Defined at method list name slot"); + auto *cisec = cast(def->isec); + assert(cisec && "Expected method name to be in a CStringInputSection"); + auto methname = cisec->getStringRefAtOffset(def->value); + if (!ObjCSelRefsHelper::getSelRef(methname)) + ObjCSelRefsHelper::makeSelRef(methname); + + // Jump to method name offset in next struct + methodNameOff += originalStructSize; + } + } +} + +// Calculate section size and final offsets for where InputSection's need to be +// written. +void ObjCMethListSection::finalize() { + // sectionSize will be the total size of the __objc_methlist section + sectionSize = 0; + for (ConcatInputSection *isec : inputs) { + // We can also use sectionSize as write offset for isec + assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) && + "expected __objc_methlist to be aligned by default with the " + "required section alignment"); + isec->outSecOff = sectionSize; + + isec->isFinal = true; + uint32_t relativeListSize = + computeRelativeMethodListSize(isec->data.size()); + sectionSize += relativeListSize; + + // If encoding the method list in relative offset format shrinks the size, + // then we also need to adjust symbol sizes to match the new size. Note that + // on 32bit platforms the size of the method list will remain the same when + // encoded in relative offset format. + if (relativeListSize != isec->data.size()) { + for (Symbol *sym : isec->symbols) { + assert(isa(sym) && + "Unexpected undefined symbol in ObjC method list"); + auto *def = cast(sym); + // There can be 0-size symbols, check if this is the case and ignore + // them. + if (def->size) { + assert( + def->size == isec->data.size() && + "Invalid ObjC method list symbol size: expected symbol size to " + "match isec size"); + def->size = relativeListSize; + } + } + } + } +} + +void ObjCMethListSection::writeTo(uint8_t *bufStart) const { + uint8_t *buf = bufStart; + for (const ConcatInputSection *isec : inputs) { + assert(buf - bufStart == long(isec->outSecOff) && + "Writing at unexpected offset"); + uint32_t writtenSize = writeRelativeMethodList(isec, buf); + buf += writtenSize; + } + assert(buf - bufStart == sectionSize && + "Written size does not match expected section size"); +} + +// Check if an InputSection is a method list. To do this we scan the +// InputSection for any symbols who's names match the patterns we expect clang +// to generate for method lists. +bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) { + const char *symPrefixes[] = {objc::symbol_names::classMethods, + objc::symbol_names::instanceMethods, + objc::symbol_names::categoryInstanceMethods, + objc::symbol_names::categoryClassMethods}; + if (!isec) + return false; + for (const Symbol *sym : isec->symbols) { + auto *def = dyn_cast_or_null(sym); + if (!def) + continue; + for (const char *prefix : symPrefixes) { + if (def->getName().starts_with(prefix)) { + assert(def->size == isec->data.size() && + "Invalid ObjC method list symbol size: expected symbol size to " + "match isec size"); + assert(def->value == 0 && + "Offset of ObjC method list symbol must be 0"); + return true; + } + } + } + + return false; +} + +// Encode a single relative offset value. The input is the data/symbol at +// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]). +// 'createSelRef' indicates that we should not directly use the specified +// symbol, but instead get the selRef for the symbol and use that instead. +void ObjCMethListSection::writeRelativeOffsetForIsec( + const ConcatInputSection *isec, uint8_t *buf, uint32_t &inSecOff, + uint32_t &outSecOff, bool useSelRef) const { + const Reloc *reloc = isec->getRelocAt(inSecOff); + assert(reloc && "Relocation expected at __objc_methlist Offset"); + auto *def = dyn_cast_or_null(reloc->referent.get()); + assert(def && "Expected all syms in __objc_methlist to be defined"); + uint32_t symVA = def->getVA(); + + if (useSelRef) { + auto *cisec = cast(def->isec); + auto methname = cisec->getStringRefAtOffset(def->value); + ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname); + assert(selRef && "Expected all selector names to already be already be " + "present in __objc_selrefs"); + symVA = selRef->getVA(); + assert(selRef->data.size() == sizeof(target->wordSize) && + "Expected one selref per ConcatInputSection"); + } + + uint32_t currentVA = isec->getVA() + outSecOff; + uint32_t delta = symVA - currentVA; + write32le(buf + outSecOff, delta); + + // Move one pointer forward in the absolute method list + inSecOff += target->wordSize; + // Move one relative offset forward in the relative method list (32 bits) + outSecOff += relativeOffsetSize; +} + +// Write a relative method list to buf, return the size of the written +// information +uint32_t +ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec, + uint8_t *buf) const { + // Copy over the header, and add the "this is a relative method list" magic + // value flag + uint32_t structSizeAndFlags = 0, structCount = 0; + readMethodListHeader(isec->data.data(), structSizeAndFlags, structCount); + // Set the struct size for the relative method list + uint32_t relativeStructSizeAndFlags = + (relativeOffsetSize * pointersPerStruct) & structSizeMask; + // Carry over the old flags from the input struct + relativeStructSizeAndFlags |= structSizeAndFlags & structFlagsMask; + // Set the relative method list flag + relativeStructSizeAndFlags |= relMethodHeaderFlag; + + writeMethodListHeader(buf, relativeStructSizeAndFlags, structCount); + + assert(methodListHeaderSize + + (structCount * pointersPerStruct * target->wordSize) == + isec->data.size() && + "Invalid computed ObjC method list size"); + + uint32_t inSecOff = methodListHeaderSize; + uint32_t outSecOff = methodListHeaderSize; + + // Go through the method list and encode input absolute pointers as relative + // offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and + // outSecOff + for (uint32_t i = 0; i < structCount; i++) { + // Write the name of the method + writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, true); + // Write the type of the method + writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false); + // Write reference to the selector of the method + writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, false); + } + + // Expecting to have read all the data in the isec + assert(inSecOff == isec->data.size() && + "Invalid actual ObjC method list size"); + assert( + outSecOff == computeRelativeMethodListSize(inSecOff) && + "Mismatch between input & output size when writing relative method list"); + return outSecOff; +} + +// Given the size of an ObjC method list InputSection, return the size of the +// method list when encoded in relative offsets format. We can do this without +// decoding the actual data, as it can be directly inferred from the size of the +// isec. +uint32_t ObjCMethListSection::computeRelativeMethodListSize( + uint32_t absoluteMethodListSize) const { + uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize; + uint32_t pointerCount = oldPointersSize / target->wordSize; + assert(((pointerCount % pointersPerStruct) == 0) && + "__objc_methlist expects method lists to have multiple-of-3 pointers"); + + uint32_t newPointersSize = pointerCount * relativeOffsetSize; + uint32_t newTotalSize = methodListHeaderSize + newPointersSize; + + assert((newTotalSize <= absoluteMethodListSize) && + "Expected relative method list size to be smaller or equal than " + "original size"); + return newTotalSize; +} + +// Read a method list header from buf +void ObjCMethListSection::readMethodListHeader(const uint8_t *buf, + uint32_t &structSizeAndFlags, + uint32_t &structCount) const { + structSizeAndFlags = read32le(buf); + structCount = read32le(buf + sizeof(uint32_t)); +} + +// Write a method list header to buf +void ObjCMethListSection::writeMethodListHeader(uint8_t *buf, + uint32_t structSizeAndFlags, + uint32_t structCount) const { + write32le(buf, structSizeAndFlags); + write32le(buf + sizeof(structSizeAndFlags), structCount); +} + void macho::createSyntheticSymbols() { auto addHeaderSymbol = [](const char *name) { symtab->addSynthetic(name, in.header->isec, /*value=*/0, diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 4586a4a0bf4361..e8fadfef56d4b2 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -684,6 +684,54 @@ class InitOffsetsSection final : public SyntheticSection { std::vector sections; }; +// This SyntheticSection is for the __objc_methlist section, which contains +// relative method lists if the -objc_relative_method_lists option is enabled. +class ObjCMethListSection final : public SyntheticSection { +public: + ObjCMethListSection(); + + static bool isMethodList(const ConcatInputSection *isec); + void addInput(ConcatInputSection *isec) { inputs.push_back(isec); } + std::vector getInputs() { return inputs; } + + void setUp(); + void finalize() override; + bool isNeeded() const override { return !inputs.empty(); } + uint64_t getSize() const override { return sectionSize; } + void writeTo(uint8_t *bufStart) const override; + +private: + void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags, + uint32_t &structCount) const; + void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags, + uint32_t structCount) const; + uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const; + void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf, + uint32_t &inSecOff, uint32_t &outSecOff, + bool useSelRef) const; + uint32_t writeRelativeMethodList(const ConcatInputSection *isec, + uint8_t *buf) const; + + static constexpr uint32_t methodListHeaderSize = + /*structSizeAndFlags*/ sizeof(uint32_t) + + /*structCount*/ sizeof(uint32_t); + // Relative method lists are supported only for 3-pointer method lists + static constexpr uint32_t pointersPerStruct = 3; + // The runtime identifies relative method lists via this magic value + static constexpr uint32_t relMethodHeaderFlag = 0x80000000; + // In the method list header, the first 2 bytes are the size of struct + static constexpr uint32_t structSizeMask = 0x0000FFFF; + // In the method list header, the last 2 bytes are the flags for the struct + static constexpr uint32_t structFlagsMask = 0xFFFF0000; + // Relative method lists have 4 byte alignment as all data in the InputSection + // is 4 byte + static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t); + + // The output size of the __objc_methlist section, computed during finalize() + uint32_t sectionSize = 0; + std::vector inputs; +}; + // Chained fixups are a replacement for classic dyld opcodes. In this format, // most of the metadata necessary for binding symbols and rebasing addresses is // stored directly in the memory location that will have the fixup applied. @@ -810,6 +858,7 @@ struct InStruct { ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; InitOffsetsSection *initOffsets = nullptr; + ObjCMethListSection *objcMethList = nullptr; ChainedFixupsSection *chainedFixups = nullptr; }; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index a18b5268fd42aa..1c054912551e3e 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1292,6 +1292,8 @@ template void Writer::run() { scanSymbols(); if (in.objcStubs->isNeeded()) in.objcStubs->setUp(); + if (in.objcMethList->isNeeded()) + in.objcMethList->setUp(); scanRelocations(); if (in.initOffsets->isNeeded()) in.initOffsets->setUp(); @@ -1363,6 +1365,7 @@ void macho::createSyntheticSections() { in.unwindInfo = makeUnwindInfoSection(); in.objCImageInfo = make(); in.initOffsets = make(); + in.objcMethList = make(); // This section contains space for just a single word, and will be used by // dyld to cache an address to the image loader it uses. @@ -1372,9 +1375,7 @@ void macho::createSyntheticSections() { segment_names::data, section_names::data, S_REGULAR, ArrayRef{arr, target->wordSize}, /*align=*/target->wordSize); - // References from dyld are not visible to us, so ensure this section is - // always treated as live. - in.imageLoaderCache->live = true; + assert(in.imageLoaderCache->live); } OutputSection *macho::firstTLVDataSection = nullptr; diff --git a/lld/test/ELF/allow-multiple-definition.s b/lld/test/ELF/allow-multiple-definition.s index 492784a3601df1..96fa2627e1bf88 100644 --- a/lld/test/ELF/allow-multiple-definition.s +++ b/lld/test/ELF/allow-multiple-definition.s @@ -9,6 +9,9 @@ # RUN: llvm-objdump --no-print-imm-hex -d %t3 | FileCheck %s # RUN: llvm-objdump --no-print-imm-hex -d %t4 | FileCheck --check-prefix=REVERT %s +# RUN: ld.lld --noinhibit-exec %t2 %t1 -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN +# WARN: warning: duplicate symbol: _bar + # RUN: ld.lld -z muldefs --fatal-warnings %t1 %t2 -o %t3 # RUN: ld.lld -z muldefs --fatal-warnings %t2 %t1 -o %t4 # RUN: llvm-objdump --no-print-imm-hex -d %t3 | FileCheck %s diff --git a/lld/test/ELF/common-gc2.s b/lld/test/ELF/common-gc2.s index fec1c4be86b5ee..1ecaef7d9af5aa 100644 --- a/lld/test/ELF/common-gc2.s +++ b/lld/test/ELF/common-gc2.s @@ -1,7 +1,9 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t -# RUN: ld.lld -gc-sections -export-dynamic %t -o %t1 -# RUN: llvm-readobj --dyn-symbols %t1 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t2.o +# RUN: ld.lld -shared -soname=t2 %t2.o -o %t2.so +# RUN: ld.lld -gc-sections -export-dynamic %t.o %t2.so -o %t +# RUN: llvm-readobj --dyn-symbols %t | FileCheck %s # CHECK: Name: bar # CHECK: Name: foo diff --git a/lld/test/ELF/executable-undefined-ignoreall.s b/lld/test/ELF/executable-undefined-ignoreall.s index cc38e17cdf619b..073b22bd84543a 100644 --- a/lld/test/ELF/executable-undefined-ignoreall.s +++ b/lld/test/ELF/executable-undefined-ignoreall.s @@ -7,8 +7,6 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o # RUN: ld.lld %t.o -o %t --unresolved-symbols=ignore-all -pie # RUN: llvm-readobj -r %t | FileCheck %s -# RUN: ld.lld %t.o -o %t --unresolved-symbols=ignore-all --export-dynamic -# RUN: llvm-readobj -r %t | FileCheck %s # CHECK: Relocations [ # CHECK-NEXT: Section ({{.*}}) .rela.plt { diff --git a/lld/test/ELF/relro-non-contiguous-script-data.s b/lld/test/ELF/relro-non-contiguous-script-data.s index fd485e89167fcc..530fc7c84eb91e 100644 --- a/lld/test/ELF/relro-non-contiguous-script-data.s +++ b/lld/test/ELF/relro-non-contiguous-script-data.s @@ -1,19 +1,21 @@ // REQUIRES: x86 +// RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t2.o +// RUN: ld.lld -shared -soname=t2 %t2.o -o %t2.so // RUN: echo "SECTIONS { \ // RUN: .dynamic : { *(.dynamic) } \ // RUN: .non_ro : { . += 1; } \ // RUN: .jcr : { *(.jcr) } \ // RUN: } " > %t.script // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: not ld.lld --export-dynamic %t.o -o /dev/null --script=%t.script 2>&1 | FileCheck %s +// RUN: not ld.lld %t.o %t2.so -o /dev/null --script=%t.script 2>&1 | FileCheck %s // RUN: echo "SECTIONS { \ // RUN: .dynamic : { *(.dynamic) } \ // RUN: .non_ro : { BYTE(1); } \ // RUN: .jcr : { *(.jcr) } \ // RUN: } " > %t2.script -// RUN: not ld.lld --export-dynamic %t.o -o /dev/null --script=%t2.script 2>&1 | FileCheck %s +// RUN: not ld.lld %t.o %t2.so -o /dev/null --script=%t2.script 2>&1 | FileCheck %s // CHECK: error: section: .jcr is not contiguous with other relro sections diff --git a/lld/test/ELF/riscv-undefined-weak.s b/lld/test/ELF/riscv-undefined-weak.s index 303a27f920c57c..8a78e1f8383386 100644 --- a/lld/test/ELF/riscv-undefined-weak.s +++ b/lld/test/ELF/riscv-undefined-weak.s @@ -1,4 +1,6 @@ # REQUIRES: riscv +# RUN: llvm-mc -filetype=obj -triple=riscv64 /dev/null -o %t2.o +# RUN: ld.lld -shared -soname=t2 %t2.o -o %t2.so # RUN: llvm-mc -filetype=obj -triple=riscv64 -riscv-asm-relax-branches=0 %s -o %t.o # RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOC %s @@ -6,7 +8,7 @@ # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=CHECK,PC %s # RUN: llvm-readelf -x .data %t | FileCheck --check-prefixes=HEX,HEX-WITHOUT-PLT %s -# RUN: ld.lld -e absolute %t.o -o %t --export-dynamic +# RUN: ld.lld -e absolute %t.o -o %t %t2.so # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefixes=CHECK,PLT %s # RUN: llvm-readelf -x .data %t | FileCheck --check-prefixes=HEX,HEX-WITH-PLT %s @@ -34,11 +36,11 @@ absolute: # CHECK-LABEL: : # CHECK-NEXT: 11{{...}}: auipc a1, 0xfffef # PC-NEXT: addi a1, a1, -0x160 -# PLT-NEXT: addi a1, a1, -0x318 +# PLT-NEXT: addi a1, a1, -0x290 # CHECK-LABEL: <.Lpcrel_hi1>: # CHECK-NEXT: 11{{...}}: auipc t1, 0xfffef # PC-NEXT: sd a2, -0x166(t1) -# PLT-NEXT: sd a2, -0x31e(t1) +# PLT-NEXT: sd a2, -0x296(t1) relative: la a1, target sd a2, target+2, t1 @@ -62,7 +64,7 @@ relative: ## We create a PLT entry and redirect the reference to it. # PLT-LABEL: : # PLT-NEXT: auipc ra, 0x0 -# PLT-NEXT: jalr 0x38(ra) +# PLT-NEXT: jalr 0x30(ra) # PLT-NEXT: [[#%x,ADDR:]]: # PLT-SAME: j 0x[[#ADDR]] # PLT-NEXT: [[#%x,ADDR:]]: @@ -84,12 +86,8 @@ branch: ## A plt entry is created for target, so this is the offset between the ## plt entry and this address. ## -## S = 0x11360 (the address of the plt entry for target) -## A = 0 -## P = 0x1343c (the address of `.`) -## -## S - A + P = -0x0x20dc = 0xffffdf24 -# HEX-WITH-PLT-SAME: 24dfffff +## S - A + P = -0x0x20ec = 0xffffdf14 +# HEX-WITH-PLT-SAME: 14dfffff .data .p2align 3 diff --git a/lld/test/ELF/static-with-export-dynamic.s b/lld/test/ELF/static-with-export-dynamic.s deleted file mode 100644 index b0349b85e30343..00000000000000 --- a/lld/test/ELF/static-with-export-dynamic.s +++ /dev/null @@ -1,32 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=i686-unknown-cloudabi %s -o %t.o -// RUN: ld.lld --export-dynamic %t.o -o %t -// RUN: llvm-readobj --dyn-syms %t | FileCheck %s - -// Ensure that a dynamic symbol table is present when --export-dynamic -// is passed in, even when creating statically linked executables. -// -// CHECK: DynamicSymbols [ -// CHECK-NEXT: Symbol { -// CHECK-NEXT: Name: -// CHECK-NEXT: Value: 0x0 -// CHECK-NEXT: Size: 0 -// CHECK-NEXT: Binding: Local -// CHECK-NEXT: Type: None -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: Undefined -// CHECK-NEXT: } -// CHECK-NEXT: Symbol { -// CHECK-NEXT: Name: _start -// CHECK-NEXT: Value: -// CHECK-NEXT: Size: 0 -// CHECK-NEXT: Binding: Global -// CHECK-NEXT: Type: None -// CHECK-NEXT: Other: 0 -// CHECK-NEXT: Section: .text -// CHECK-NEXT: } -// CHECK-NEXT: ] - -.global _start -_start: - ret diff --git a/lld/test/ELF/weak-undef.s b/lld/test/ELF/weak-undef.s index 3a9d5f462c21b6..21488023a79e10 100644 --- a/lld/test/ELF/weak-undef.s +++ b/lld/test/ELF/weak-undef.s @@ -16,10 +16,11 @@ # RELOC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend # RELOC-NEXT: {{.*}} 0000000100000001 R_X86_64_64 0000000000000000 foo + 0 -# COMMON: Symbol table '.dynsym' contains 2 entries: -# COMMON-NEXT: Num: Value Size Type Bind Vis Ndx Name -# COMMON-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND -# COMMON-NEXT: 1: 0000000000000000 0 NOTYPE WEAK DEFAULT UND foo +# NORELOC-NOT: Symbol table '.dynsym' +# RELOC: Symbol table '.dynsym' contains 2 entries: +# RELOC-NEXT: Num: Value Size Type Bind Vis Ndx Name +# RELOC-NEXT: 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND +# RELOC-NEXT: 1: 0000000000000000 0 NOTYPE WEAK DEFAULT UND foo # COMMON: Hex dump of section '.data': # COMMON-NEXT: {{.*}} 00000000 00000000 # COMMON-EMPTY: diff --git a/lld/test/ELF/x86-64-dyn-rel-error.s b/lld/test/ELF/x86-64-dyn-rel-error.s index a03adf89072f31..1590045312d4a3 100644 --- a/lld/test/ELF/x86-64-dyn-rel-error.s +++ b/lld/test/ELF/x86-64-dyn-rel-error.s @@ -19,7 +19,7 @@ # CHECK-NOT: error: # RUN: ld.lld --noinhibit-exec %t.o %t2.so -o /dev/null 2>&1 | FileCheck --check-prefix=WARN %s -# RUN: not ld.lld --export-dynamic --unresolved-symbols=ignore-all %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=WARN %s +# RUN: not ld.lld --export-dynamic --unresolved-symbols=ignore-all %t.o %t2.so -o /dev/null 2>&1 | FileCheck --check-prefix=WARN %s # WARN: relocation R_X86_64_32 cannot be used against symbol 'zed'; recompile with -fPIC # WARN: relocation R_X86_64_PC32 cannot be used against symbol 'zed'; recompile with -fPIC diff --git a/lld/test/MachO/objc-relative-method-lists-simple.s b/lld/test/MachO/objc-relative-method-lists-simple.s new file mode 100644 index 00000000000000..5a77085c7d93d8 --- /dev/null +++ b/lld/test/MachO/objc-relative-method-lists-simple.s @@ -0,0 +1,249 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t && cd %t + +## Compile a64_rel_dylib.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_rel_dylib.o a64_simple_class.s + +## Test arm64 + relative method lists +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -objc_relative_method_lists +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_REL + +## Test arm64 + relative method lists + dead-strip +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -objc_relative_method_lists -dead_strip +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_REL + +## Test arm64 + traditional method lists (no relative offsets) +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -no_objc_relative_method_lists +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_NO_REL + + +CHK_REL: Contents of (__DATA_CONST,__objc_classlist) section +CHK_REL-NEXT: _OBJC_CLASS_$_MyClass +CHK_REL: baseMethods +CHK_REL-NEXT: entsize 12 (relative) +CHK_REL-NEXT: count 3 +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_00 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_00] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_01 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_01] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_02 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_02] + +CHK_REL: Meta Class +CHK_REL-NEXT: isa 0x{{[0-9a-f]*}} _OBJC_METACLASS_$_MyClass +CHK_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_REL-NEXT: entsize 12 (relative) +CHK_REL-NEXT: count 3 +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_00 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_00] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_01 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_01] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_02 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_02] + + +CHK_NO_REL-NOT: (relative) + +CHK_NO_REL: Contents of (__DATA_CONST,__objc_classlist) section +CHK_NO_REL-NEXT: _OBJC_CLASS_$_MyClass + +CHK_NO_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_NO_REL-NEXT: entsize 24 +CHK_NO_REL-NEXT: count 3 +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_00 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_00] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_01 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_01] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_02 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_02] + + +CHK_NO_REL: Meta Class +CHK_NO_REL-NEXT: _OBJC_METACLASS_$_MyClass + +CHK_NO_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_NO_REL-NEXT: entsize 24 +CHK_NO_REL-NEXT: count 3 +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_00 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_00] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_01 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_01] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_02 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_02] + + +######################## Generate a64_simple_class.s ######################### +# clang -c simple_class.mm -s -o a64_simple_class.s -target arm64-apple-macos -arch arm64 -Oz + +######################## simple_class.mm ######################## +# __attribute__((objc_root_class)) +# @interface MyClass +# - (void)instance_method_00; +# - (void)instance_method_01; +# - (void)instance_method_02; +# + (void)class_method_00; +# + (void)class_method_01; +# + (void)class_method_02; +# @end +# +# @implementation MyClass +# - (void)instance_method_00 {} +# - (void)instance_method_01 {} +# - (void)instance_method_02 {} +# + (void)class_method_00 {} +# + (void)class_method_01 {} +# + (void)class_method_02 {} +# @end +# +# void *_objc_empty_cache; +# void *_objc_empty_vtable; +# + +#--- objc-macros.s +.macro .objc_selector_def name + .p2align 2 +"\name": + .cfi_startproc + ret + .cfi_endproc +.endm + +#--- a64_simple_class.s +.include "objc-macros.s" + +.section __TEXT,__text,regular,pure_instructions +.build_version macos, 11, 0 + +.objc_selector_def "-[MyClass instance_method_00]" +.objc_selector_def "-[MyClass instance_method_01]" +.objc_selector_def "-[MyClass instance_method_02]" + +.objc_selector_def "+[MyClass class_method_00]" +.objc_selector_def "+[MyClass class_method_01]" +.objc_selector_def "+[MyClass class_method_02]" + +.globl __objc_empty_vtable +.zerofill __DATA,__common,__objc_empty_vtable,8,3 +.section __DATA,__objc_data +.globl _OBJC_CLASS_$_MyClass +.p2align 3, 0x0 + +_OBJC_CLASS_$_MyClass: + .quad _OBJC_METACLASS_$_MyClass + .quad 0 + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_CLASS_RO_$_MyClass + .globl _OBJC_METACLASS_$_MyClass + .p2align 3, 0x0 + +_OBJC_METACLASS_$_MyClass: + .quad _OBJC_METACLASS_$_MyClass + .quad _OBJC_CLASS_$_MyClass + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_METACLASS_RO_$_MyClass + + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyClass" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "class_method_00" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: + .asciz "class_method_01" +l_OBJC_METH_VAR_NAME_.2: + .asciz "class_method_02" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_CLASS_METHODS_MyClass: + .long 24 + .long 3 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_00]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_01]" + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_02]" + .p2align 3, 0x0 + +__OBJC_METACLASS_RO_$_MyClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_CLASS_METHODS_MyClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.3: + .asciz "instance_method_00" +l_OBJC_METH_VAR_NAME_.4: + .asciz "instance_method_01" +l_OBJC_METH_VAR_NAME_.5: + .asciz "instance_method_02" + + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyClass: + .long 24 + .long 3 + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_00]" + .quad l_OBJC_METH_VAR_NAME_.4 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_01]" + .quad l_OBJC_METH_VAR_NAME_.5 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_02]" + .p2align 3, 0x0 + +__OBJC_CLASS_RO_$_MyClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .globl __objc_empty_cache + +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols diff --git a/lldb/source/Host/common/Alarm.cpp b/lldb/source/Host/common/Alarm.cpp index 245cdc7ae5c2da..afc770d20d7b1e 100644 --- a/lldb/source/Host/common/Alarm.cpp +++ b/lldb/source/Host/common/Alarm.cpp @@ -154,54 +154,60 @@ lldb::thread_result_t Alarm::AlarmThread() { // // Below we only deal with the timeout expiring and fall through for dealing // with the rest. - std::unique_lock alarm_lock(m_alarm_mutex); - if (next_alarm) { - if (!m_alarm_cv.wait_until(alarm_lock, *next_alarm, predicate)) { - // The timeout for the next alarm expired. - - // Clear the next timeout to signal that we need to recompute the next - // timeout. - next_alarm.reset(); - - // Iterate over all the callbacks. Call the ones that have expired - // and remove them from the list. - const TimePoint now = std::chrono::system_clock::now(); - auto it = m_entries.begin(); - while (it != m_entries.end()) { - if (it->expiration <= now) { - it->callback(); - it = m_entries.erase(it); - } else { - it++; + llvm::SmallVector callbacks; + { + std::unique_lock alarm_lock(m_alarm_mutex); + if (next_alarm) { + if (!m_alarm_cv.wait_until(alarm_lock, *next_alarm, predicate)) { + // The timeout for the next alarm expired. + + // Clear the next timeout to signal that we need to recompute the next + // timeout. + next_alarm.reset(); + + // Iterate over all the callbacks. Call the ones that have expired + // and remove them from the list. + const TimePoint now = std::chrono::system_clock::now(); + auto it = m_entries.begin(); + while (it != m_entries.end()) { + if (it->expiration <= now) { + callbacks.emplace_back(std::move(it->callback)); + it = m_entries.erase(it); + } else { + it++; + } } } + } else { + m_alarm_cv.wait(alarm_lock, predicate); } - } else { - m_alarm_cv.wait(alarm_lock, predicate); - } - // Fall through after waiting on the condition variable. At this point - // either the predicate is true or we woke up because an alarm expired. + // Fall through after waiting on the condition variable. At this point + // either the predicate is true or we woke up because an alarm expired. - // The alarm thread is shutting down. - if (m_exit) { - exit = true; - if (m_run_callbacks_on_exit) { - for (Entry &entry : m_entries) - entry.callback(); + // The alarm thread is shutting down. + if (m_exit) { + exit = true; + if (m_run_callbacks_on_exit) { + for (Entry &entry : m_entries) + callbacks.emplace_back(std::move(entry.callback)); + } } - continue; - } - // A new alarm was added or an alarm expired. Either way we need to - // recompute when this thread should wake up for the next alarm. - if (m_recompute_next_alarm || !next_alarm) { - for (Entry &entry : m_entries) { - if (!next_alarm || entry.expiration < *next_alarm) - next_alarm = entry.expiration; + // A new alarm was added or an alarm expired. Either way we need to + // recompute when this thread should wake up for the next alarm. + if (m_recompute_next_alarm || !next_alarm) { + for (Entry &entry : m_entries) { + if (!next_alarm || entry.expiration < *next_alarm) + next_alarm = entry.expiration; + } + m_recompute_next_alarm = false; } - m_recompute_next_alarm = false; } + + // Outside the lock, call the callbacks. + for (Callback &callback : callbacks) + callback(); } return {}; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 3e5ee6f6637303..d3fc487aed4333 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -3154,7 +3154,7 @@ AppleObjCRuntimeV2::TaggedPointerVendorExtended::GetClassDescriptor( << m_objc_debug_taggedpointer_ext_payload_lshift) >> m_objc_debug_taggedpointer_ext_payload_rshift); int64_t data_payload_signed = - ((int64_t)((int64_t)unobfuscated + ((int64_t)((uint64_t)unobfuscated << m_objc_debug_taggedpointer_ext_payload_lshift) >> m_objc_debug_taggedpointer_ext_payload_rshift); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 16a7fc446fbe1d..4c9d85fd9f5140 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1333,9 +1333,9 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder - buildAtomicCmpXchgWithSuccess(Register OldValRes, Register SuccessRes, - Register Addr, Register CmpVal, Register NewVal, - MachineMemOperand &MMO); + buildAtomicCmpXchgWithSuccess(const DstOp &OldValRes, const DstOp &SuccessRes, + const SrcOp &Addr, const SrcOp &CmpVal, + const SrcOp &NewVal, MachineMemOperand &MMO); /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, /// MMO`. @@ -1351,8 +1351,9 @@ class MachineIRBuilder { /// registers of the same type. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr, - Register CmpVal, Register NewVal, + MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &CmpVal, + const SrcOp &NewVal, MachineMemOperand &MMO); /// Build and insert `OldValRes = G_ATOMICRMW_ Addr, Val, MMO`. diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h index b7db6e0bbfb71c..b25f8eb77ee38b 100644 --- a/llvm/include/llvm/IR/Verifier.h +++ b/llvm/include/llvm/IR/Verifier.h @@ -77,7 +77,6 @@ class TBAAVerifier { /// Visit an instruction and return true if it is valid, return false if an /// invalid TBAA is attached. bool visitTBAAMetadata(Instruction &I, const MDNode *MD); - bool visitTBAAStructMetadata(Instruction &I, const MDNode *MD); }; /// Check a function for errors, useful for use when debugging a diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h index 346ca4ad2eb314..f05b90114d75a6 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -184,7 +184,7 @@ class BinaryCoverageReader : public CoverageMappingReader { private: std::vector Filenames; std::vector MappingRecords; - InstrProfSymtab ProfileNames; + std::unique_ptr ProfileNames; size_t CurrentRecord = 0; std::vector FunctionsFilenames; std::vector Expressions; @@ -195,8 +195,9 @@ class BinaryCoverageReader : public CoverageMappingReader { // D69471, which can split up function records into multiple sections on ELF. FuncRecordsStorage FuncRecords; - BinaryCoverageReader(FuncRecordsStorage &&FuncRecords) - : FuncRecords(std::move(FuncRecords)) {} + BinaryCoverageReader(std::unique_ptr Symtab, + FuncRecordsStorage &&FuncRecords) + : ProfileNames(std::move(Symtab)), FuncRecords(std::move(FuncRecords)) {} public: BinaryCoverageReader(const BinaryCoverageReader &) = delete; @@ -209,12 +210,10 @@ class BinaryCoverageReader : public CoverageMappingReader { SmallVectorImpl *BinaryIDs = nullptr); static Expected> - createCoverageReaderFromBuffer(StringRef Coverage, - FuncRecordsStorage &&FuncRecords, - InstrProfSymtab &&ProfileNames, - uint8_t BytesInAddress, - llvm::endianness Endian, - StringRef CompilationDir = ""); + createCoverageReaderFromBuffer( + StringRef Coverage, FuncRecordsStorage &&FuncRecords, + std::unique_ptr ProfileNamesPtr, uint8_t BytesInAddress, + llvm::endianness Endian, StringRef CompilationDir = ""); Error readNextRecord(CoverageMappingRecord &Record) override; }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 25ec06a7392027..612c444faec648 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -471,6 +471,13 @@ class InstrProfSymtab { public: InstrProfSymtab() = default; + // Not copyable or movable. + // Consider std::unique_ptr for move. + InstrProfSymtab(const InstrProfSymtab &) = delete; + InstrProfSymtab &operator=(const InstrProfSymtab &) = delete; + InstrProfSymtab(InstrProfSymtab &&) = delete; + InstrProfSymtab &operator=(InstrProfSymtab &&) = delete; + /// Create InstrProfSymtab from an object file section which /// contains function PGO names. When section may contain raw /// string data or string data in compressed form. This method diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 66814d39527301..bd7496a799c579 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -86,9 +86,12 @@ template <> struct IRTraits { // SampleProfileProber. class PseudoProbeManager { DenseMap GUIDToProbeDescMap; + const ThinOrFullLTOPhase LTOPhase; public: - PseudoProbeManager(const Module &M) { + PseudoProbeManager(const Module &M, + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) + : LTOPhase(LTOPhase) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -126,17 +129,15 @@ class PseudoProbeManager { bool profileIsValid(const Function &F, const FunctionSamples &Samples) const { const auto *Desc = getDesc(F); - if (!Desc) { - LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " - << F.getName() << "\n"); - return false; - } - if (Desc->getFunctionHash() != Samples.getFunctionHash()) { - LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName() - << "\n"); - return false; - } - return true; + assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc || + profileIsHashMismatched(*Desc, Samples) == + F.hasFnAttribute("profile-checksum-mismatch")) && + "In post-link, profile checksum matching state doesn't match " + "function 'profile-checksum-mismatch' attribute."); + // The desc for import function is unavailable. Check the function attribute + // for mismatch. + return (!Desc && !F.hasFnAttribute("profile-checksum-mismatch")) || + (Desc && !profileIsHashMismatched(*Desc, Samples)); } }; diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index c8195584ade378..9e17dcaa55925d 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1190,107 +1190,113 @@ void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef Fns) { llvm::sort(ScalarDescs, compareByVectorFnName); } +static const VecDesc VecFuncs_Accelerate[] = { +#define TLI_DEFINE_ACCELERATE_VECFUNCS +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_DarwinLibSystemM[] = { +#define TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_LIBMVEC_X86[] = { +#define TLI_DEFINE_LIBMVEC_X86_VECFUNCS +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_MASSV[] = { +#define TLI_DEFINE_MASSV_VECFUNCS +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_SVML[] = { +#define TLI_DEFINE_SVML_VECFUNCS +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_SLEEFGNUABI_VF2[] = { +#define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \ + {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" +}; +static const VecDesc VecFuncs_SLEEFGNUABI_VF4[] = { +#define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \ + {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" +}; +static const VecDesc VecFuncs_SLEEFGNUABI_VFScalable[] = { +#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" +}; + +static const VecDesc VecFuncs_ArmPL[] = { +#define TLI_DEFINE_ARMPL_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" +}; + +const VecDesc VecFuncs_AMDLIBM[] = { +#define TLI_DEFINE_AMDLIBM_VECFUNCS +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ + {SCAL, VEC, VF, MASK, VABI_PREFIX}, +#include "llvm/Analysis/VecFuncs.def" +}; + void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( enum VectorLibrary VecLib, const llvm::Triple &TargetTriple) { switch (VecLib) { case Accelerate: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_ACCELERATE_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_Accelerate); break; } case DarwinLibSystemM: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_DarwinLibSystemM); break; } case LIBMVEC_X86: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_LIBMVEC_X86); break; } case MASSV: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_MASSV_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_MASSV); break; } case SVML: { - const VecDesc VecFuncs[] = { - #define TLI_DEFINE_SVML_VECFUNCS - #include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_SVML); break; } case SLEEFGNUABI: { - const VecDesc VecFuncs_VF2[] = { -#define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \ - {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX}, -#include "llvm/Analysis/VecFuncs.def" - }; - const VecDesc VecFuncs_VF4[] = { -#define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, VABI_PREFIX) \ - {SCAL, VEC, VF, /* MASK = */ false, VABI_PREFIX}, -#include "llvm/Analysis/VecFuncs.def" - }; - const VecDesc VecFuncs_VFScalable[] = { -#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ - {SCAL, VEC, VF, MASK, VABI_PREFIX}, -#include "llvm/Analysis/VecFuncs.def" - }; - switch (TargetTriple.getArch()) { default: break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - addVectorizableFunctions(VecFuncs_VF2); - addVectorizableFunctions(VecFuncs_VF4); - addVectorizableFunctions(VecFuncs_VFScalable); + addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF2); + addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4); + addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable); break; } break; } case ArmPL: { - const VecDesc VecFuncs[] = { -#define TLI_DEFINE_ARMPL_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ - {SCAL, VEC, VF, MASK, VABI_PREFIX}, -#include "llvm/Analysis/VecFuncs.def" - }; - switch (TargetTriple.getArch()) { default: break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_ArmPL); break; } break; } case AMDLIBM: { - const VecDesc VecFuncs[] = { -#define TLI_DEFINE_AMDLIBM_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \ - {SCAL, VEC, VF, MASK, VABI_PREFIX}, -#include "llvm/Analysis/VecFuncs.def" - }; - addVectorizableFunctions(VecFuncs); + addVectorizableFunctions(VecFuncs_AMDLIBM); break; } case NoLibrary: diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 07d4cb5eaa23c8..b8ba782254c370 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -930,14 +930,14 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, } MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( - Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal, - Register NewVal, MachineMemOperand &MMO) { + const DstOp &OldValRes, const DstOp &SuccessRes, const SrcOp &Addr, + const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT SuccessResTy = getMRI()->getType(SuccessRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT CmpValTy = getMRI()->getType(CmpVal); - LLT NewValTy = getMRI()->getType(NewVal); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT SuccessResTy = SuccessRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT CmpValTy = CmpVal.getLLTTy(*getMRI()); + LLT NewValTy = NewVal.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(SuccessResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); @@ -947,24 +947,25 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess( assert(OldValResTy == NewValTy && "type mismatch"); #endif - return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) - .addDef(OldValRes) - .addDef(SuccessRes) - .addUse(Addr) - .addUse(CmpVal) - .addUse(NewVal) - .addMemOperand(&MMO); + auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS); + OldValRes.addDefToMIB(*getMRI(), MIB); + SuccessRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + CmpVal.addSrcToMIB(MIB); + NewVal.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder -MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, - Register CmpVal, Register NewVal, +MachineIRBuilder::buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, + const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO) { #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT CmpValTy = getMRI()->getType(CmpVal); - LLT NewValTy = getMRI()->getType(NewVal); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT CmpValTy = CmpVal.getLLTTy(*getMRI()); + LLT NewValTy = NewVal.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(CmpValTy.isValid() && "invalid operand type"); @@ -973,12 +974,13 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, assert(OldValResTy == NewValTy && "type mismatch"); #endif - return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG) - .addDef(OldValRes) - .addUse(Addr) - .addUse(CmpVal) - .addUse(NewVal) - .addMemOperand(&MMO); + auto MIB = buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + CmpVal.addSrcToMIB(MIB); + NewVal.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e10b8bc8c5e2eb..24f69ea1b742a6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1455,6 +1455,9 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + // Freeze the index so we don't poison the clamping code we're about to emit. + Idx = DAG.getFreeze(Idx); + // Then store the inserted part. if (PartVT.isVector()) { SDValue SubStackPtr = diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index d95e34b7be1ca1..82de5b6cc6041c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5172,9 +5172,6 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa)) TBAAVerifyHelper.visitTBAAMetadata(I, TBAA); - if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa_struct)) - TBAAVerifyHelper.visitTBAAStructMetadata(I, TBAA); - if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias)) visitAliasScopeListMetadata(MD); if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope)) @@ -7529,35 +7526,6 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { return true; } -bool TBAAVerifier::visitTBAAStructMetadata(Instruction &I, const MDNode *MD) { - CheckTBAA(MD->getNumOperands() % 3 == 0, - "tbaa.struct operands must occur in groups of three", &I, MD); - - // Each group of three operands must consist of two integers and a - // tbaa node. Moreover, the regions described by the offset and size - // operands must be non-overlapping. - std::optional NextFree; - for (unsigned int Idx = 0; Idx < MD->getNumOperands(); Idx += 3) { - auto *OffsetCI = - mdconst::dyn_extract_or_null(MD->getOperand(Idx)); - CheckTBAA(OffsetCI, "Offset must be a constant integer", &I, MD); - - auto *SizeCI = - mdconst::dyn_extract_or_null(MD->getOperand(Idx + 1)); - CheckTBAA(SizeCI, "Size must be a constant integer", &I, MD); - - MDNode *TBAA = dyn_cast_or_null(MD->getOperand(Idx + 2)); - CheckTBAA(TBAA, "TBAA tag missing", &I, MD); - visitTBAAMetadata(I, TBAA); - - bool NonOverlapping = !NextFree || NextFree->ule(OffsetCI->getValue()); - CheckTBAA(NonOverlapping, "Overlapping tbaa.struct regions", &I, MD); - - NextFree = OffsetCI->getValue() + SizeCI->getValue(); - } - return true; -} - char VerifierLegacyPass::ID = 0; INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false) diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index f4c6cbc8dd4442..005521bad6e014 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -141,7 +141,6 @@ struct ELFWriter { // TargetObjectWriter wrappers. bool is64Bit() const; - bool usesRela(const MCSectionELF &Sec) const; uint64_t align(Align Alignment); @@ -260,6 +259,7 @@ class ELFObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; + bool usesRela(const MCSectionELF &Sec) const; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; @@ -394,11 +394,6 @@ bool ELFWriter::is64Bit() const { return OWriter.TargetObjectWriter->is64Bit(); } -bool ELFWriter::usesRela(const MCSectionELF &Sec) const { - return OWriter.hasRelocationAddend() && - Sec.getType() != ELF::SHT_LLVM_CALL_GRAPH_PROFILE; -} - // Emit the ELF header. void ELFWriter::writeHeader(const MCAssembler &Asm) { // ELF Header @@ -825,24 +820,22 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, if (OWriter.Relocations[&Sec].empty()) return nullptr; - const StringRef SectionName = Sec.getName(); - bool Rela = usesRela(Sec); - std::string RelaSectionName = Rela ? ".rela" : ".rel"; - RelaSectionName += SectionName; + unsigned Flags = ELF::SHF_INFO_LINK; + if (Sec.getFlags() & ELF::SHF_GROUP) + Flags = ELF::SHF_GROUP; + const StringRef SectionName = Sec.getName(); + const bool Rela = OWriter.usesRela(Sec); unsigned EntrySize; if (Rela) EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela); else EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); - unsigned Flags = ELF::SHF_INFO_LINK; - if (Sec.getFlags() & ELF::SHF_GROUP) - Flags = ELF::SHF_GROUP; - - MCSectionELF *RelaSection = Ctx.createELFRelSection( - RelaSectionName, Rela ? ELF::SHT_RELA : ELF::SHT_REL, Flags, EntrySize, - Sec.getGroup(), &Sec); + MCSectionELF *RelaSection = + Ctx.createELFRelSection(((Rela ? ".rela" : ".rel") + SectionName), + Rela ? ELF::SHT_RELA : ELF::SHT_REL, Flags, + EntrySize, Sec.getGroup(), &Sec); RelaSection->setAlignment(is64Bit() ? Align(8) : Align(4)); return RelaSection; } @@ -938,11 +931,11 @@ void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, void ELFWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec) { std::vector &Relocs = OWriter.Relocations[&Sec]; + const bool Rela = OWriter.usesRela(Sec); // Sort the relocation entries. MIPS needs this. OWriter.TargetObjectWriter->sortRelocs(Asm, Relocs); - const bool Rela = usesRela(Sec); if (OWriter.TargetObjectWriter->getEMachine() == ELF::EM_MIPS) { for (const ELFRelocationEntry &Entry : Relocs) { uint32_t Symidx = Entry.Symbol ? Entry.Symbol->getIndex() : 0; @@ -1499,7 +1492,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue = !RelocateWithSymbol && SymA && !SymA->isUndefined() ? C + Layout.getSymbolOffset(*SymA) : C; - if (hasRelocationAddend()) { + if (usesRela(FixupSection)) { Addend = FixedValue; FixedValue = 0; } @@ -1528,6 +1521,11 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, Relocations[&FixupSection].push_back(Rec); } +bool ELFObjectWriter::usesRela(const MCSectionELF &Sec) const { + return hasRelocationAddend() && + Sec.getType() != ELF::SHT_LLVM_CALL_GRAPH_PROFILE; +} + bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SA, const MCFragment &FB, bool InSet, bool IsPCRel) const { diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index d328460510830a..445b48067a9755 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -894,31 +894,34 @@ static Error readCoverageMappingData( Expected> BinaryCoverageReader::createCoverageReaderFromBuffer( StringRef Coverage, FuncRecordsStorage &&FuncRecords, - InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress, + std::unique_ptr ProfileNamesPtr, uint8_t BytesInAddress, llvm::endianness Endian, StringRef CompilationDir) { - std::unique_ptr Reader( - new BinaryCoverageReader(std::move(FuncRecords))); - Reader->ProfileNames = std::move(ProfileNames); + if (ProfileNamesPtr == nullptr) + return make_error(coveragemap_error::malformed, + "Caller must provide ProfileNames"); + std::unique_ptr Reader(new BinaryCoverageReader( + std::move(ProfileNamesPtr), std::move(FuncRecords))); + InstrProfSymtab &ProfileNames = *Reader->ProfileNames; StringRef FuncRecordsRef = Reader->FuncRecords->getBuffer(); if (BytesInAddress == 4 && Endian == llvm::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 4 && Endian == llvm::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 8 && Endian == llvm::endianness::little) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else if (BytesInAddress == 8 && Endian == llvm::endianness::big) { if (Error E = readCoverageMappingData( - Reader->ProfileNames, Coverage, FuncRecordsRef, - Reader->MappingRecords, CompilationDir, Reader->Filenames)) + ProfileNames, Coverage, FuncRecordsRef, Reader->MappingRecords, + CompilationDir, Reader->Filenames)) return std::move(E); } else return make_error( @@ -963,8 +966,8 @@ loadTestingFormat(StringRef Data, StringRef CompilationDir) { if (Data.size() < ProfileNamesSize) return make_error(coveragemap_error::malformed, "the size of ProfileNames is too big"); - InstrProfSymtab ProfileNames; - if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address)) + auto ProfileNames = std::make_unique(); + if (Error E = ProfileNames->create(Data.substr(0, ProfileNamesSize), Address)) return std::move(E); Data = Data.substr(ProfileNamesSize); @@ -1099,7 +1102,7 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch, OF->isLittleEndian() ? llvm::endianness::little : llvm::endianness::big; // Look for the sections that we are interested in. - InstrProfSymtab ProfileNames; + auto ProfileNames = std::make_unique(); std::vector NamesSectionRefs; // If IPSK_name is not found, fallback to search for IPK_covname, which is // used when binary correlation is enabled. @@ -1116,7 +1119,7 @@ loadBinaryFormat(std::unique_ptr Bin, StringRef Arch, return make_error( coveragemap_error::malformed, "the size of coverage mapping section is not one"); - if (Error E = ProfileNames.create(NamesSectionRefs.back())) + if (Error E = ProfileNames->create(NamesSectionRefs.back())) return std::move(E); auto CoverageSection = lookupSections(*OF, IPSK_covmap); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index bddf3d958a1ae6..6e7d34f5adaa3f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1594,6 +1594,9 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) { } } + if (auto *NewEltI = dyn_cast(NewElt)) + NewEltI->copyIRFlags(&I); + NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N); } } else { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 0788d0c3a72136..027ee1086bf4e0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -153,7 +153,7 @@ /// %__THREW__.val = __THREW__; /// __THREW__ = 0; /// %__threwValue.val = __threwValue; -/// if (%__THREW__.val != 0) { +/// if (%__THREW__.val != 0 & %__threwValue.val != 0) { /// %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); /// if (%label == 0) /// emscripten_longjmp(%__THREW__.val, %__threwValue.val); @@ -712,10 +712,12 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F); BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F); BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F); + Value *ThrewCmp = IRB.CreateICmpNE(Threw, getAddrSizeInt(M, 0)); Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV, ThrewValueGV->getName() + ".val"); - Value *ThrewCmp = IRB.CreateICmpNE(Threw, getAddrSizeInt(M, 0)); - IRB.CreateCondBr(ThrewCmp, ThenBB1, ElseBB1); + Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0)); + Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1"); + IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1); // Generate call.em.longjmp BB once and share it within the function if (!CallEmLongjmpBB) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2cbef8a7ae611d..7545a92c114ef2 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -453,6 +453,7 @@ class SampleProfileMatcher { Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + const ThinOrFullLTOPhase LTOPhase; SampleProfileMap FlattenedProfiles; // For each function, the matcher generates a map, of which each entry is a // mapping from the source location of current build to the source location in @@ -504,8 +505,9 @@ class SampleProfileMatcher { public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, - const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager){}; + const PseudoProbeManager *ProbeManager, + ThinOrFullLTOPhase LTOPhase) + : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){}; void runOnModule(); void clearMatchingData() { // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which @@ -521,7 +523,7 @@ class SampleProfileMatcher { return &It->second; return nullptr; } - void runOnFunction(const Function &F); + void runOnFunction(Function &F); void findIRAnchors(const Function &F, std::map &IRAnchors); void findProfileAnchors( @@ -1911,15 +1913,22 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { bool Changed = false; if (FunctionSamples::ProfileIsProbeBased) { - if (!ProbeManager->profileIsValid(F, *Samples)) { + LLVM_DEBUG({ + if (!ProbeManager->getDesc(F)) + dbgs() << "Probe descriptor missing for Function " << F.getName() + << "\n"; + }); + + if (ProbeManager->profileIsValid(F, *Samples)) { + ++NumMatchedProfile; + } else { + ++NumMismatchedProfile; LLVM_DEBUG( dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName() << "\n"); - ++NumMismatchedProfile; if (!SalvageStaleProfile) return false; } - ++NumMatchedProfile; } else { if (getFunctionLoc(F) == 0) return false; @@ -2185,7 +2194,7 @@ bool SampleProfileLoader::doInitialization(Module &M, // Load pseudo probe descriptors for probe-based function samples. if (Reader->profileIsProbeBased()) { - ProbeManager = std::make_unique(M); + ProbeManager = std::make_unique(M, LTOPhase); if (!ProbeManager->moduleIsProbed(M)) { const char *Msg = "Pseudo-probe-based profile requires SampleProfileProbePass"; @@ -2197,8 +2206,8 @@ bool SampleProfileLoader::doInitialization(Module &M, if (ReportProfileStaleness || PersistProfileStaleness || SalvageStaleProfile) { - MatchingManager = - std::make_unique(M, *Reader, ProbeManager.get()); + MatchingManager = std::make_unique( + M, *Reader, ProbeManager.get(), LTOPhase); } return true; @@ -2452,7 +2461,7 @@ void SampleProfileMatcher::runStaleProfileMatching( } } -void SampleProfileMatcher::runOnFunction(const Function &F) { +void SampleProfileMatcher::runOnFunction(Function &F) { // We need to use flattened function samples for matching. // Unlike IR, which includes all callsites from the source code, the callsites // in profile only show up when they are hit by samples, i,e. the profile @@ -2481,8 +2490,16 @@ void SampleProfileMatcher::runOnFunction(const Function &F) { // support for pseudo-probe. if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased && !ProbeManager->profileIsValid(F, *FSFlattened)) { - // The matching result will be saved to IRToProfileLocationMap, create a new - // map for each function. + // For imported functions, the checksum metadata(pseudo_probe_desc) are + // dropped, so we leverage function attribute(profile-checksum-mismatch) to + // transfer the info: add the attribute during pre-link phase and check it + // during post-link phase(see "profileIsValid"). + if (FunctionSamples::ProfileIsProbeBased && + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) + F.addFnAttr("profile-checksum-mismatch"); + + // The matching result will be saved to IRToProfileLocationMap, create a + // new map for each function. auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap); @@ -2758,8 +2775,9 @@ void SampleProfileMatcher::distributeIRToProfileLocationMap( FS.setIRToProfileLocationMap(&(ProfileMappings->second)); } - for (auto &Inlinees : FS.getCallsiteSamples()) { - for (auto FS : Inlinees.second) { + for (auto &Callees : + const_cast(FS.getCallsiteSamples())) { + for (auto &FS : Callees.second) { distributeIRToProfileLocationMap(FS.second); } } diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 5d366e3d6dee0a..f7d4803ded155a 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -422,6 +422,7 @@ class HWAddressSanitizer { bool InstrumentLandingPads; bool InstrumentWithCalls; bool InstrumentStack; + bool InstrumentGlobals; bool DetectUseAfterScope; bool UsePageAliases; bool UseMatchAllCallback; @@ -639,11 +640,13 @@ void HWAddressSanitizer::initializeModule() { // If we don't have personality function support, fall back to landing pads. InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime); + InstrumentGlobals = + !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime); + if (!CompileKernel) { createHwasanCtorComdat(); - bool InstrumentGlobals = optOr(ClGlobals, NewRuntime); - if (InstrumentGlobals && !UsePageAliases) + if (InstrumentGlobals) instrumentGlobals(); bool InstrumentPersonalityFunctions = @@ -787,6 +790,13 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { if (SSI && SSI->stackAccessIsSafe(*Inst)) return true; } + + if (isa(getUnderlyingObject(Ptr))) { + if (!InstrumentGlobals) + return true; + // TODO: Optimize inbound global accesses, like Asan `instrumentMop`. + } + return false; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e1f26b922dbe4d..961380ce4ad9f2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13928,26 +13928,29 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // that feed it. The type of the loaded value may indicate a more suitable // width than V's type. We want to base the vector element size on the width // of memory operations where possible. - SmallVector, 16> Worklist; + SmallVector> Worklist; SmallPtrSet Visited; if (auto *I = dyn_cast(V)) { - Worklist.emplace_back(I, I->getParent()); + Worklist.emplace_back(I, I->getParent(), 0); Visited.insert(I); } // Traverse the expression tree in bottom-up order looking for loads. If we // encounter an instruction we don't yet handle, we give up. auto Width = 0u; + Value *FirstNonBool = nullptr; while (!Worklist.empty()) { - Instruction *I; - BasicBlock *Parent; - std::tie(I, Parent) = Worklist.pop_back_val(); + auto [I, Parent, Level] = Worklist.pop_back_val(); // We should only be looking at scalar instructions here. If the current // instruction has a vector type, skip. auto *Ty = I->getType(); if (isa(Ty)) continue; + if (Ty != Builder.getInt1Ty() && !FirstNonBool) + FirstNonBool = I; + if (Level > RecursionMaxDepth) + continue; // If the current instruction is a load, update MaxWidth to reflect the // width of the loaded value. @@ -13960,11 +13963,16 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // user or the use is a PHI node, we add it to the worklist. else if (isa(I)) { - for (Use &U : I->operands()) + for (Use &U : I->operands()) { if (auto *J = dyn_cast(U.get())) if (Visited.insert(J).second && - (isa(I) || J->getParent() == Parent)) - Worklist.emplace_back(J, J->getParent()); + (isa(I) || J->getParent() == Parent)) { + Worklist.emplace_back(J, J->getParent(), Level + 1); + continue; + } + if (!FirstNonBool && U.get()->getType() != Builder.getInt1Ty()) + FirstNonBool = U.get(); + } } else { break; } @@ -13974,8 +13982,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { // gave up for some reason, just return the width of V. Otherwise, return the // maximum width we found. if (!Width) { - if (auto *CI = dyn_cast(V)) - V = CI->getOperand(0); + if (V->getType() == Builder.getInt1Ty() && FirstNonBool) + V = FirstNonBool; Width = DL->getTypeSizeInBits(V->getType()); } @@ -14415,6 +14423,13 @@ void BoUpSLP::computeMinimumValueSizes() { unsigned MaxBitWidth = ComputeMaxBitWidth( TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot); + if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { + if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) + ReductionBitWidth = bit_ceil(MaxBitWidth); + else if (MaxBitWidth == 0) + ReductionBitWidth = 0; + } + for (unsigned Idx : RootDemotes) ToDemote.append(VectorizableTree[Idx]->Scalars.begin(), VectorizableTree[Idx]->Scalars.end()); @@ -15831,7 +15846,9 @@ class HorizontalReduction { RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize); unsigned ReduxWidth = std::min( - llvm::bit_floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts)); + llvm::bit_floor(NumReducedVals), + std::clamp(MaxElts, RedValsMaxNumber, + RegMaxNumber * RedValsMaxNumber)); unsigned Start = 0; unsigned Pos = Start; // Restarts vectorization attempt with lower vector factor. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir index 0cf9602adbb094..499c08fa4966f9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -40,11 +40,12 @@ body: | ; CHECK-LABEL: name: ldrxrox_breg_oreg ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) - ; CHECK: $x0 = COPY [[LDRXroX]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $x0 = COPY [[LDRXroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -65,11 +66,12 @@ body: | liveins: $d0, $x1 ; CHECK-LABEL: name: ldrdrox_breg_oreg ; CHECK: liveins: $d0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) - ; CHECK: $d0 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d0 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -78,6 +80,9 @@ body: | RET_ReallyLR implicit $d0 ... --- +# This shouldn't be folded, since we reuse the result of the G_PTR_ADD outside +# the G_LOAD + name: more_than_one_use alignment: 4 legalized: true @@ -87,18 +92,17 @@ machineFunctionInfo: {} body: | bb.0: liveins: $x0, $x1 - ; This shouldn't be folded, since we reuse the result of the G_PTR_ADD outside - ; the G_LOAD ; CHECK-LABEL: name: more_than_one_use ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) - ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]] - ; CHECK: $x0 = COPY [[ADDXrr1]] - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -121,11 +125,12 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: ldrxrox_shl ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $x2 = COPY [[LDRXroX]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $x2 = COPY [[LDRXroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -148,11 +153,12 @@ body: | liveins: $x0, $x1, $d2 ; CHECK-LABEL: name: ldrdrox_shl ; CHECK: liveins: $x0, $x1, $d2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $d2 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d2 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -175,11 +181,12 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: ldrxrox_mul_rhs ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $x2 = COPY [[LDRXroX]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $x2 = COPY [[LDRXroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 8 %2:gpr(s64) = G_MUL %0, %1(s64) @@ -202,11 +209,12 @@ body: | liveins: $x0, $x1, $d2 ; CHECK-LABEL: name: ldrdrox_mul_rhs ; CHECK: liveins: $x0, $x1, $d2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $d2 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d2 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 8 %2:gpr(s64) = G_MUL %0, %1(s64) @@ -229,11 +237,12 @@ body: | liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: ldrxrox_mul_lhs ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $x2 = COPY [[LDRXroX]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $x2 = COPY [[LDRXroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 8 %2:gpr(s64) = G_MUL %1, %0(s64) @@ -256,11 +265,12 @@ body: | liveins: $x0, $x1, $d2 ; CHECK-LABEL: name: ldrdrox_mul_lhs ; CHECK: liveins: $x0, $x1, $d2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: $d2 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d2 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 8 %2:gpr(s64) = G_MUL %1, %0(s64) @@ -272,6 +282,9 @@ body: | ... --- +# Show that we don't get a shifted load from a mul when we don't have a +# power of 2. (The bit isn't set on the load.) + name: mul_not_pow_2 alignment: 4 legalized: true @@ -280,19 +293,18 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that we don't get a shifted load from a mul when we don't have a - ; power of 2. (The bit isn't set on the load.) liveins: $x0, $x1, $d2 ; CHECK-LABEL: name: mul_not_pow_2 ; CHECK: liveins: $x0, $x1, $d2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 7 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) - ; CHECK: $d2 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 7 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d2 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 7 %2:gpr(s64) = G_MUL %1, %0(s64) @@ -304,6 +316,9 @@ body: | ... --- +# Show that we don't get a shifted load from a mul when we don't have +# the right power of 2. (The bit isn't set on the load.) + name: mul_wrong_pow_2 alignment: 4 legalized: true @@ -312,19 +327,18 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that we don't get a shifted load from a mul when we don't have - ; the right power of 2. (The bit isn't set on the load.) liveins: $x0, $x1, $d2 ; CHECK-LABEL: name: mul_wrong_pow_2 ; CHECK: liveins: $x0, $x1, $d2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 16 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 - ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) - ; CHECK: $d2 = COPY [[LDRDroX]] - ; CHECK: RET_ReallyLR implicit $d2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 16 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[SUBREG_TO_REG]], [[COPY]], $xzr + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: $d2 = COPY [[LDRDroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $d2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 16 %2:gpr(s64) = G_MUL %1, %0(s64) @@ -336,6 +350,9 @@ body: | ... --- +# Show that we can still fall back to the register-register addressing +# mode when we fail to pull in the shift. + name: more_than_one_use_shl_1 alignment: 4 legalized: true @@ -344,19 +361,18 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that we can still fall back to the register-register addressing - ; mode when we fail to pull in the shift. liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: more_than_one_use_shl_1 ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[UBFMXri]], 0, 0 :: (load (s64) from %ir.addr) - ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] - ; CHECK: $x2 = COPY [[ADDXrr]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[UBFMXri]], 0, 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] + ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -370,6 +386,9 @@ body: | ... --- +# Show that when the GEP is used outside a memory op, we don't do any +# folding at all. + name: more_than_one_use_shl_2 alignment: 4 legalized: true @@ -378,22 +397,21 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that when the GEP is used outside a memory op, we don't do any - ; folding at all. liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: more_than_one_use_shl_2 ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]] - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) - ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]] - ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] - ; CHECK: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]] - ; CHECK: $x2 = COPY [[ADDXrr2]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[UBFMXri]] + ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[ADDXri]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]] + ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[ADDXrr1]] + ; CHECK-NEXT: $x2 = COPY [[ADDXrr2]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -409,6 +427,9 @@ body: | ... --- +# Show that when we have a fastpath for shift-left, we perform the folding +# if it has more than one use. + name: more_than_one_use_shl_lsl_fast alignment: 4 legalized: true @@ -417,18 +438,17 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that when we have a fastpath for shift-left, we perform the folding - ; if it has more than one use. liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: more_than_one_use_shl_lsl_fast ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] - ; CHECK: $x2 = COPY [[ADDXrr]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] + ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -442,6 +462,9 @@ body: | ... --- +# Show that we don't fold into multiple memory ops when we don't have a +# fastpath for shift-left. + name: more_than_one_use_shl_lsl_slow alignment: 4 legalized: true @@ -450,19 +473,18 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that we don't fold into multiple memory ops when we don't have a - ; fastpath for shift-left. liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: more_than_one_use_shl_lsl_slow ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 - ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] - ; CHECK: $x2 = COPY [[ADDXrr]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 + ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] + ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -476,6 +498,9 @@ body: | ... --- +# Show that when we're optimizing for size, we'll do the folding no matter +# what. + name: more_than_one_use_shl_minsize alignment: 4 legalized: true @@ -484,22 +509,21 @@ tracksRegLiveness: true machineFunctionInfo: {} body: | bb.0: - ; Show that when we're optimizing for size, we'll do the folding no matter - ; what. liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: more_than_one_use_shl_minsize ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] - ; CHECK: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY2]], [[COPY]], 3 - ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) - ; CHECK: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 - ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] - ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrs]], [[ADDXrr]] - ; CHECK: $x2 = COPY [[ADDXrr1]] - ; CHECK: RET_ReallyLR implicit $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64common = UBFMXri [[COPY]], 61, 60 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] + ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64 = ADDXrs [[COPY2]], [[COPY]], 3 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[UBFMXri]], 3, 0 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[ADDXri]] + ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrs]], [[ADDXrr]] + ; CHECK-NEXT: $x2 = COPY [[ADDXrr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = G_CONSTANT i64 3 %2:gpr(s64) = G_SHL %0, %1(s64) @@ -525,11 +549,12 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: ldrwrox ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) - ; CHECK: $w2 = COPY [[LDRWroX]] - ; CHECK: RET_ReallyLR implicit $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) + ; CHECK-NEXT: $w2 = COPY [[LDRWroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $w2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -549,11 +574,12 @@ body: | liveins: $d0, $x1 ; CHECK-LABEL: name: ldrsrox ; CHECK: liveins: $d0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) - ; CHECK: $s2 = COPY [[LDRSroX]] - ; CHECK: RET_ReallyLR implicit $h2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load (s32) from %ir.addr) + ; CHECK-NEXT: $s2 = COPY [[LDRSroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $h2 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -573,11 +599,12 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: ldrhrox ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load (s16) from %ir.addr) - ; CHECK: $h2 = COPY [[LDRHroX]] - ; CHECK: RET_ReallyLR implicit $h2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load (s16) from %ir.addr) + ; CHECK-NEXT: $h2 = COPY [[LDRHroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $h2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -597,11 +624,12 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: ldbbrox ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load (s8) from %ir.addr) - ; CHECK: $w2 = COPY [[LDRBBroX]] - ; CHECK: RET_ReallyLR implicit $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load (s8) from %ir.addr) + ; CHECK-NEXT: $w2 = COPY [[LDRBBroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $w2 %0:gpr(p0) = COPY $x0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 @@ -621,11 +649,12 @@ body: | liveins: $d0, $x1 ; CHECK-LABEL: name: ldrqrox ; CHECK: liveins: $d0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load (<2 x s64>) from %ir.addr) - ; CHECK: $q0 = COPY [[LDRQroX]] - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load (<2 x s64>) from %ir.addr) + ; CHECK-NEXT: $q0 = COPY [[LDRQroX]] + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:gpr(p0) = COPY $d0 %1:gpr(s64) = COPY $x1 %2:gpr(p0) = G_PTR_ADD %0, %1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll index cf9ed4d5f0e16a..573f921e638cf8 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -20,7 +20,7 @@ entry: define i8 @test2(i32 %a) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 +; CHECK-NEXT: mov w8, #135 // =0x87 ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: cmp w8, #1024 ; CHECK-NEXT: cset w0, eq @@ -37,7 +37,7 @@ entry: define i8 @test3(i32 %a) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: cmp w8, #1024 @@ -84,7 +84,7 @@ entry: define i8 @test6(i64 %a) { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #135 +; CHECK-NEXT: mov w8, #135 // =0x87 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: cmp x8, #1024 ; CHECK-NEXT: cset w0, eq @@ -101,7 +101,7 @@ entry: define i8 @test7(i64 %a) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1024 +; CHECK-NEXT: mov w8, #1024 // =0x400 ; CHECK-NEXT: movk w8, #33, lsl #16 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: cmp x8, #1024 @@ -175,7 +175,7 @@ define i32 @test9(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-NEXT: cmp w2, #1 ; CHECK-NEXT: b.lt .LBB8_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: .LBB8_2: // %for.body @@ -226,7 +226,7 @@ define void @test10(ptr nocapture %x, ptr nocapture readonly %y, ptr nocapture % ; CHECK-LABEL: test10: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: mov w9, #1024 +; CHECK-NEXT: mov w9, #1024 // =0x400 ; CHECK-NEXT: movk w9, #32, lsl #16 ; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: str w8, [x0] @@ -253,7 +253,7 @@ entry: define i8 @test11(i64 %a) { ; CHECK-LABEL: test11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1610612736 +; CHECK-NEXT: mov w8, #-1610612736 // =0xa0000000 ; CHECK-NEXT: and x8, x0, x8 ; CHECK-NEXT: cmp x8, #1024 ; CHECK-NEXT: cset w0, eq diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll index c9fd2d38e27acd..089e171e5a4a79 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -518,6 +518,4 @@ attributes #5 = { nobuiltin } !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"short", !1} -!4 = !{i64 0, i64 4, !5, i64 4, i64 2, !6, i64 8, i64 4, !5, i64 12, i64 2, !6, i64 16, i64 4, !5, i64 20, i64 2, !6} -!5 = !{!0, !0, i64 0} -!6 = !{!3, !3, i64 0} +!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3} diff --git a/llvm/test/CodeGen/AArch64/pr86717.ll b/llvm/test/CodeGen/AArch64/pr86717.ll new file mode 100644 index 00000000000000..aa8be954be72d0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr86717.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +define <16 x i8> @f(i32 %0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub w8, w8, w0 +; CHECK-NEXT: bfxil x9, x8, #0, #4 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: strb w8, [x9] +; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: ret + %2 = sub nuw i32 1, %0 + %3 = insertelement <16 x i8> zeroinitializer, i8 3, i32 %2 + ret <16 x i8> %3 +} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index d9001656f308e1..2ad28b8dd6ecf5 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -10668,3 +10668,111 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x store <2 x i64> %r, ptr addrspace(1) %out ret void } + +define <2 x i32> @v_sdiv_i32_exact(<2 x i32> %num) { +; CHECK-LABEL: @v_sdiv_i32_exact( +; CHECK: %1 = extractelement <2 x i32> %num, i64 0 +; CHECK-NEXT: %2 = sdiv exact i32 %1, 4096 +; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0 +; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1 +; CHECK-NEXT: %5 = sdiv exact i32 %4, 1024 +; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1 +; CHECK-NEXT: ret <2 x i32> %6 +; +; GFX6-LABEL: v_sdiv_i32_exact: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ashrrev_i32_e32 v0, 12, v0 +; GFX6-NEXT: v_ashrrev_i32_e32 v1, 10, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sdiv_i32_exact: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ashrrev_i32_e32 v0, 12, v0 +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 10, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %result = sdiv exact <2 x i32> %num, + ret <2 x i32> %result +} + +define <2 x i64> @v_sdiv_i64_exact(<2 x i64> %num) { +; CHECK-LABEL: @v_sdiv_i64_exact( +; CHECK: %1 = extractelement <2 x i64> %num, i64 0 +; CHECK-NEXT: %2 = sdiv exact i64 %1, 4096 +; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0 +; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1 +; CHECK-NEXT: %5 = sdiv exact i64 %4, 1024 +; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1 +; CHECK-NEXT: ret <2 x i64> %6 +; +; GFX6-LABEL: v_sdiv_i64_exact: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 12 +; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 10 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sdiv_i64_exact: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_ashrrev_i64 v[0:1], 12, v[0:1] +; GFX9-NEXT: v_ashrrev_i64 v[2:3], 10, v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] + %result = sdiv exact <2 x i64> %num, + ret <2 x i64> %result +} + +define <2 x i32> @v_udiv_i32_exact(<2 x i32> %num) { +; CHECK-LABEL: @v_udiv_i32_exact( +; CHECK: %1 = extractelement <2 x i32> %num, i64 0 +; CHECK-NEXT: %2 = udiv exact i32 %1, 4096 +; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0 +; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1 +; CHECK-NEXT: %5 = udiv exact i32 %4, 1024 +; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1 +; CHECK-NEXT: ret <2 x i32> %6 +; +; GFX6-LABEL: v_udiv_i32_exact: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshrrev_b32_e32 v0, 12, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 10, v1 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_udiv_i32_exact: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 12, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %result = udiv exact <2 x i32> %num, + ret <2 x i32> %result +} + +define <2 x i64> @v_udiv_i64_exact(<2 x i64> %num) { +; CHECK-LABEL: @v_udiv_i64_exact( +; CHECK: %1 = extractelement <2 x i64> %num, i64 0 +; CHECK-NEXT: %2 = udiv exact i64 %1, 4096 +; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0 +; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1 +; CHECK-NEXT: %5 = udiv exact i64 %4, 1024 +; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1 +; CHECK-NEXT: ret <2 x i64> %6 +; +; GFX6-LABEL: v_udiv_i64_exact: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 12 +; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 10 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_udiv_i64_exact: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b64 v[0:1], 12, v[0:1] +; GFX9-NEXT: v_lshrrev_b64 v[2:3], 10, v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] + %result = udiv exact <2 x i64> %num, + ret <2 x i64> %result +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll index 25106b456d2f7a..6629d34405492c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -123,9 +123,10 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 -; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0 +; CHECK-NEXT: st.b $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -149,9 +150,10 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 -; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1 +; CHECK-NEXT: st.h $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -175,9 +177,10 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 -; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2 +; CHECK-NEXT: st.w $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -201,9 +204,10 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 -; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3 +; CHECK-NEXT: st.d $a2, $a3, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -227,9 +231,10 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2 +; CHECK-NEXT: fst.s $fa0, $a2, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -253,9 +258,10 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3 +; CHECK-NEXT: fst.d $fa0, $a2, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll index 7f232073ae129c..19171b7d8ed784 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -87,9 +87,10 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 -; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0 +; CHECK-NEXT: st.b $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -106,9 +107,10 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 -; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1 +; CHECK-NEXT: st.h $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -125,9 +127,10 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 -; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2 +; CHECK-NEXT: st.w $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -144,9 +147,10 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 -; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3 +; CHECK-NEXT: st.d $a2, $a3, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -163,9 +167,10 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 -; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2 +; CHECK-NEXT: fst.s $fa0, $a2, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -182,9 +187,10 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 -; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 +; CHECK-NEXT: addi.d $a2, $sp, 0 +; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3 +; CHECK-NEXT: fst.d $fa0, $a2, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index f810f51f6bc07a..d9d83633a8537f 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1282,6 +1282,96 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind { ret i32 %1 } +define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) { +; RV64I-LABEL: sh6_sh3_add1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh6_sh3_add1: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ret +entry: + %shl = shl i64 %z, 3 + %shl1 = shl i64 %y, 6 + %add = add nsw i64 %shl1, %shl + %add2 = add nsw i64 %add, %x + ret i64 %add2 +} + +define i64 @sh6_sh3_add2(i64 noundef %x, i64 noundef %y, i64 noundef %z) { +; RV64I-LABEL: sh6_sh3_add2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh6_sh3_add2: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: add a0, a1, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ret +entry: + %shl = shl i64 %z, 3 + %shl1 = shl i64 %y, 6 + %add = add nsw i64 %shl1, %x + %add2 = add nsw i64 %add, %shl + ret i64 %add2 +} + +define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) { +; RV64I-LABEL: sh6_sh3_add3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh6_sh3_add3: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add a1, a1, a2 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ret +entry: + %shl = shl i64 %z, 3 + %shl1 = shl i64 %y, 6 + %add = add nsw i64 %shl1, %shl + %add2 = add nsw i64 %x, %add + ret i64 %add2 +} + +define i64 @sh6_sh3_add4(i64 noundef %x, i64 noundef %y, i64 noundef %z) { +; RV64I-LABEL: sh6_sh3_add4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh6_sh3_add4: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %shl = shl i64 %z, 3 + %shl1 = shl i64 %y, 6 + %add = add nsw i64 %x, %shl + %add2 = add nsw i64 %add, %shl1 + ret i64 %add2 +} + ; Make sure we use sext.h+slli+srli for Zba+Zbb. ; FIXME: The RV64I and Zba only cases can be done with only 3 shifts. define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj.ll index d88f42a4dc5847..32942cd92e684f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj.ll @@ -22,8 +22,10 @@ entry: to label %try.cont unwind label %lpad ; CHECK: entry.split.split: -; CHECK: %__threwValue.val = load i32, ptr @__threwValue -; CHECK-NEXT: %[[CMP:.*]] = icmp ne i32 %__THREW__.val, 0 +; CHECK: %[[CMP0:.*]] = icmp ne i32 %__THREW__.val, 0 +; CHECK-NEXT: %__threwValue.val = load i32, ptr @__threwValue +; CHECK-NEXT: %[[CMP1:.*]] = icmp ne i32 %__threwValue.val, 0 +; CHECK-NEXT: %[[CMP:.*]] = and i1 %[[CMP0]], %[[CMP1]] ; CHECK-NEXT: br i1 %[[CMP]], label %if.then1, label %if.else1 ; This is exception checking part. %if.else1 leads here @@ -119,7 +121,6 @@ if.end: ; preds = %entry ; CHECK-NEXT: unreachable ; CHECK: normal: -; CHECK-NEXT: %__threwValue.val = load i32, ptr @__threwValue, align 4 ; CHECK-NEXT: icmp ne i32 %__THREW__.val, 0 return: ; preds = %if.end, %entry diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index dca4c59d7c8740..27ec95a2c462ab 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -37,8 +37,10 @@ entry: ; CHECK-NEXT: call cc{{.*}} void @__invoke_void_[[PTR]]_i32(ptr @emscripten_longjmp, [[PTR]] %[[JMPBUF]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load [[PTR]], ptr @__THREW__ ; CHECK-NEXT: store [[PTR]] 0, ptr @__THREW__ +; CHECK-NEXT: %[[CMP0:.*]] = icmp ne [[PTR]] %__THREW__.val, 0 ; CHECK-NEXT: %[[THREWVALUE_VAL:.*]] = load i32, ptr @__threwValue -; CHECK-NEXT: %[[CMP:.*]] = icmp ne [[PTR]] %__THREW__.val, 0 +; CHECK-NEXT: %[[CMP1:.*]] = icmp ne i32 %[[THREWVALUE_VAL]], 0 +; CHECK-NEXT: %[[CMP:.*]] = and i1 %[[CMP0]], %[[CMP1]] ; CHECK-NEXT: br i1 %[[CMP]], label %if.then1, label %if.else1 ; CHECK: entry.split.split.split: diff --git a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll index 535450a52ff60e..695a2d0cd806e0 100644 --- a/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll +++ b/llvm/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll @@ -9,11 +9,11 @@ define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind read ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $32, %esp -; X86-NEXT: movzwl 8(%ebp), %eax -; X86-NEXT: movl 12(%ebp), %ecx -; X86-NEXT: andl $7, %ecx +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movzwl 8(%ebp), %ecx +; X86-NEXT: andl $7, %eax ; X86-NEXT: movaps %xmm0, (%esp) -; X86-NEXT: movw %ax, (%esp,%ecx,2) +; X86-NEXT: movw %cx, (%esp,%eax,2) ; X86-NEXT: movaps (%esp), %xmm0 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index f73288dc58de31..37583f2ba07b4f 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -1009,18 +1009,19 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: pushl %esi ; X86AVX2-NEXT: andl $-16, %esp ; X86AVX2-NEXT: subl $48, %esp -; X86AVX2-NEXT: movl 8(%ebp), %eax -; X86AVX2-NEXT: movl 12(%ebp), %ecx -; X86AVX2-NEXT: movl 16(%ebp), %edx +; X86AVX2-NEXT: movl 8(%ebp), %edx +; X86AVX2-NEXT: movl 12(%ebp), %eax +; X86AVX2-NEXT: movl 16(%ebp), %ecx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%edx,%edx), %esi +; X86AVX2-NEXT: addl %ecx, %ecx +; X86AVX2-NEXT: movl %ecx, %esi ; X86AVX2-NEXT: andl $3, %esi -; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) +; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%edx,%edx), %eax -; X86AVX2-NEXT: andl $3, %eax -; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) +; X86AVX2-NEXT: incl %ecx +; X86AVX2-NEXT: andl $3, %ecx +; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -1362,12 +1363,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, i64* %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi +; X86AVX2-NEXT: addl %eax, %eax +; X86AVX2-NEXT: movl %eax, %esi ; X86AVX2-NEXT: andl $3, %esi ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax +; X86AVX2-NEXT: incl %eax ; X86AVX2-NEXT: andl $3, %eax ; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 @@ -1742,18 +1744,19 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: pushl %esi ; X86AVX2-NEXT: andl $-32, %esp ; X86AVX2-NEXT: subl $96, %esp -; X86AVX2-NEXT: movl 8(%ebp), %eax -; X86AVX2-NEXT: movl 12(%ebp), %ecx -; X86AVX2-NEXT: movl 16(%ebp), %edx +; X86AVX2-NEXT: movl 8(%ebp), %edx +; X86AVX2-NEXT: movl 12(%ebp), %eax +; X86AVX2-NEXT: movl 16(%ebp), %ecx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%edx,%edx), %esi +; X86AVX2-NEXT: addl %ecx, %ecx +; X86AVX2-NEXT: movl %ecx, %esi ; X86AVX2-NEXT: andl $7, %esi -; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) +; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%edx,%edx), %eax -; X86AVX2-NEXT: andl $7, %eax -; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) +; X86AVX2-NEXT: incl %ecx +; X86AVX2-NEXT: andl $7, %ecx +; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -2128,12 +2131,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, i64* %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi +; X86AVX2-NEXT: addl %eax, %eax +; X86AVX2-NEXT: movl %eax, %esi ; X86AVX2-NEXT: andl $7, %esi ; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax +; X86AVX2-NEXT: incl %eax ; X86AVX2-NEXT: andl $7, %eax ; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll b/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll index 84f57f80ab339e..f9040afd1c0166 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --global-value-regex "x" --version 4 -; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64 -hwasan-globals=0 | FileCheck %s --check-prefixes=NOGLOB -; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64 -hwasan-globals=1 | FileCheck %s +; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64-linux-gnu -hwasan-globals=0 | FileCheck %s --check-prefixes=NOGLOB +; RUN: opt < %s -S -passes=hwasan -mtriple=aarch64-linux-gnu -hwasan-globals=1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @x = dso_local global i32 0, align 4 @@ -13,29 +15,13 @@ define dso_local noundef i32 @_Z3tmpv() sanitize_hwaddress { ; NOGLOB-LABEL: define dso_local noundef i32 @_Z3tmpv( ; NOGLOB-SAME: ) #[[ATTR0:[0-9]+]] { ; NOGLOB-NEXT: entry: -; NOGLOB-NEXT: [[TMP12:%.*]] = load i64, ptr @__hwasan_tls, align 4 -; NOGLOB-NEXT: [[TMP1:%.*]] = or i64 [[TMP12]], 4294967295 -; NOGLOB-NEXT: [[HWASAN_SHADOW:%.*]] = add i64 [[TMP1]], 1 -; NOGLOB-NEXT: [[TMP2:%.*]] = inttoptr i64 [[HWASAN_SHADOW]] to ptr -; NOGLOB-NEXT: [[TMP3:%.*]] = lshr i64 ptrtoint (ptr @x to i64), 56 -; NOGLOB-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i8 -; NOGLOB-NEXT: [[TMP5:%.*]] = and i64 ptrtoint (ptr @x to i64), 72057594037927935 -; NOGLOB-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4 -; NOGLOB-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP6]] -; NOGLOB-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1 -; NOGLOB-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP4]], [[TMP8]] -; NOGLOB-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1:![0-9]+]] -; NOGLOB: 10: -; NOGLOB-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP2]], ptr @x, i32 2) -; NOGLOB-NEXT: br label [[TMP11]] -; NOGLOB: 11: ; NOGLOB-NEXT: [[TMP0:%.*]] = load i32, ptr @x, align 4 ; NOGLOB-NEXT: ret i32 [[TMP0]] ; ; CHECK-LABEL: define dso_local noundef i32 @_Z3tmpv( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__hwasan_tls, align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__hwasan_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[TMP12]], 4294967295 ; CHECK-NEXT: [[HWASAN_SHADOW:%.*]] = add i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[HWASAN_SHADOW]] to ptr diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll index 079d7224128301..62fd7a16715693 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll @@ -54,7 +54,6 @@ define dso_local noundef i1 @_Z6targetv() sanitize_hwaddress { ; CHECK: sw.bb1: ; CHECK-NEXT: br label [[RETURN]] ; CHECK: while.body: -; CHECK-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP16]], ptr @stackbuf, i32 19) ; CHECK-NEXT: store ptr [[BUF_HWASAN]], ptr @stackbuf, align 8 ; CHECK-NEXT: call void @may_jump() ; CHECK-NEXT: br label [[RETURN]] diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index 2f264a2432fc3d..50b0e7a0f5471b 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -141,4 +141,4 @@ attributes #1 = { argmemonly nounwind } !5 = distinct !{!5, !"some domain"} !6 = !{!7} !7 = distinct !{!7, !5, !"some scope 2"} -!8 = !{i64 0, i64 8, !0} +!8 = !{i64 0, i64 8, null} diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll index d079c03f1dcb93..996d2c0e67e165 100644 --- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll +++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll @@ -75,7 +75,7 @@ entry: !1 = !{!"omnipotent char", !0} !2 = !{!5, !5, i64 0} !3 = !{i64 0, i64 4, !2} -!4 = !{i64 0, i64 8, !2} +!4 = !{i64 0, i64 8, null} !5 = !{!"float", !0} !6 = !{i64 0, i64 4, !2, i64 4, i64 4, !2} !7 = !{i64 0, i64 2, !2, i64 4, i64 6, !2} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll new file mode 100644 index 00000000000000..7771e8369b6198 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=riscv64-unknown-linux-gnu -mattr="+v" --passes=slp-vectorizer < %s | FileCheck %s + +define i32 @test(ptr %0, ptr %1) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP2]] to i32 +; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]] +; CHECK-NEXT: ret i32 [[OP_RDX]] +; +entry: + %zext.0 = zext i8 1 to i32 + %zext.1 = zext i8 1 to i32 + %zext.2 = zext i8 1 to i32 + %zext.3 = zext i8 1 to i32 + %select.zext.0 = select i1 false, i32 -1, i32 %zext.0 + %select.zext.1 = select i1 false, i32 0, i32 %zext.1 + %select.zext.2 = select i1 false, i32 0, i32 %zext.2 + %select.zext.3 = select i1 false, i32 0, i32 %zext.3 + + %load.5 = load i32, ptr %1, align 4 + + %and.0 = and i32 %load.5, %select.zext.0 + %and.1 = and i32 %and.0, %select.zext.1 + %and.2 = and i32 %and.1, %select.zext.2 + %and.3 = and i32 %and.2, %select.zext.3 + + ret i32 %and.3 +} + diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll index cfe3ca9f8f9e5f..7b4e2b0ce9112e 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll @@ -11,9 +11,8 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16> ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP9]] to i64 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i64 [[TMP11]], 1 ; CHECK-NEXT: store i64 [[OP_RDX]], ptr [[A]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 61034de81e4b27..0fcd787fef9769 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -539,7 +539,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !6 = !{!5, !5, i64 0} !7 = !{i64 0, i64 8, !6, i64 8, i64 4, !1} !8 = !{i64 0, i64 4, !1, i64 4, i64 8, !6} -!9 = !{i64 0, i64 8, !6, i64 8, i64 8, !1} +!9 = !{i64 0, i64 8, !6, i64 4, i64 8, !1} !10 = !{i64 0, i64 2, !1, i64 2, i64 2, !1} !11 = !{i64 0, i64 1, !1, i64 1, i64 3, !1} !12 = !{i64 0, i64 2, !1, i64 2, i64 6, !1} diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof new file mode 100644 index 00000000000000..76a8fc9d19a85d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof @@ -0,0 +1,16 @@ +main:252:0 + 1: 0 + 2: 50 + 5: 50 + 7: bar:102 + 1: 51 + 2: baz:51 + 1: 51 + !CFGChecksum: 4294967295 + !Attributes: 3 + !CFGChecksum: 281479271677951 + !Attributes: 2 + !CFGChecksum: 281582081721716 +bar:1:1 + 1: 1 + !CFGChecksum: 281479271677951 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll new file mode 100644 index 00000000000000..df56b55dcdf3c0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll @@ -0,0 +1,67 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof -pass-remarks=inline -S -o %t 2>&1 | FileCheck %s --check-prefix=INLINE +; RUN: FileCheck %s < %t +; RUN: FileCheck %s < %t --check-prefix=MERGE + + +; Make sure bar is inlined into main for attr merging verification. +; INLINE: 'bar' inlined into 'main' + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @baz() #0 { +entry: + ret i32 0 +} + +define i32 @bar() #0 !dbg !11 { +; CHECK: define {{.*}} @bar() {{.*}} #[[#BAR_ATTR:]] ! +entry: + %call = call i32 @baz() + ret i32 0 +} + +define i32 @main() #0 { +; MERGE: define {{.*}} @main() {{.*}} #[[#MAIN_ATTR:]] ! +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %call = call i32 @bar(), !dbg !14 + br label %for.cond +} + +; CHECK: attributes #[[#BAR_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}} + +; Verify the attribute is not merged into the caller. +; MERGE-NOT: attributes #[[#MAIN_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7} +!llvm.pseudo_probe_desc = !{!8, !9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "0df0c950a93a603a7d13f0a9d4623642") +!2 = !{!3} +!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression()) +!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true) +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !{i64 7546896869197086323, i64 4294967295, !"baz"} +!9 = !{i64 -2012135647395072713, i64 281530612780802, !"bar"} +!10 = !{i64 -2624081020897602054, i64 281582081721716, !"main"} +!11 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!12 = distinct !DISubroutineType(types: !13) +!13 = !{} +!14 = !DILocation(line: 15, column: 10, scope: !15) +!15 = !DILexicalBlockFile(scope: !16, file: !1, discriminator: 186646591) +!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 14, column: 40) +!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 14, column: 3) +!18 = distinct !DILexicalBlock(scope: !19, file: !1, line: 14, column: 3) +!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 12, type: !20, scopeLine: 13, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!20 = !DISubroutineType(types: !13) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll new file mode 100644 index 00000000000000..e00b737cae4e85 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll @@ -0,0 +1,63 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s + + +; CHECK: Run stale profile matching for bar +; CHECK: Callsite with callee:baz is matched from 4 to 2 +; CHECK: 'baz' inlined into 'main' to match profiling context with (cost=always): preinliner at callsite bar:3:8.4 @ main:3:10.7 + +; CHECK: Probe descriptor missing for Function bar +; CHECK: Profile is invalid due to CFG mismatch for Function bar + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { + %1 = call i32 @bar(), !dbg !13 + ret i32 0 +} + +define available_externally i32 @bar() #1 !dbg !21 { + %1 = call i32 @baz(), !dbg !23 + ret i32 0 +} + +define available_externally i32 @baz() #0 !dbg !25 { + ret i32 0 +} + +attributes #0 = { "use-sample-profile" } +attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" } + +!llvm.dbg.cu = !{!0, !7, !9} +!llvm.module.flags = !{!11} +!llvm.pseudo_probe_desc = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "7220f1a2d70ff869f1a6ab7958e3c393") +!2 = !{!3} +!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression()) +!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true) +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!8 = !DIFile(filename: "test1.v1.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "76696bd6bfe16a9f227fe03cfdb6a82c") +!9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532") +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"} +!13 = !DILocation(line: 8, column: 10, scope: !14) +!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591) +!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40) +!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 7, column: 3) +!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 7, column: 3) +!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !19, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20) +!19 = distinct !DISubroutineType(types: !20) +!20 = !{} +!21 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 3, type: !22, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !20) +!22 = !DISubroutineType(types: !20) +!23 = !DILocation(line: 6, column: 8, scope: !24) +!24 = !DILexicalBlockFile(scope: !21, file: !8, discriminator: 186646567) +!25 = distinct !DISubprogram(name: "baz", scope: !10, file: !10, line: 1, type: !22, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !20) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll index 55225b415d4abc..270beee4ebc2bd 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll @@ -106,7 +106,7 @@ define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_add ret i32 %2, !dbg !132 } -attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" "profile-checksum-mismatch"} attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll index 89477ea5fecf1e..29877fb22a2c2e 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll @@ -48,6 +48,8 @@ ; } ; } +; Verify not running profile matching for checksum matched function. +; CHECK-NOT: Run stale profile matching for bar ; CHECK: Run stale profile matching for main diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll index 73ae66dd76c66e..bbcdcb6f586742 100644 --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -836,6 +836,5 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, !6 } -!6 = !{ !1, !1, i64 0 } +!5 = !{ i64 0, i64 8, null } !13 = distinct !{} diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll index 87a70ccd3fc7c5..db7c5f535f7e9d 100644 --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -870,6 +870,5 @@ define <2 x float> @f25(<2 x float> %src) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, !6 } -!6 = !{ !1, !1, i64 0 } +!5 = !{ i64 0, i64 8, null } !13 = distinct !{} diff --git a/llvm/test/Verifier/tbaa-struct.ll b/llvm/test/Verifier/tbaa-struct.ll index 14c19a19d5ae89..b8ddc7cee496a9 100644 --- a/llvm/test/Verifier/tbaa-struct.ll +++ b/llvm/test/Verifier/tbaa-struct.ll @@ -1,36 +1,28 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: llvm-as < %s 2>&1 + +; FIXME: The verifer should reject the invalid !tbaa.struct nodes below. define void @test_overlapping_regions(ptr %a1) { -; CHECK: Overlapping tbaa.struct regions -; CHECK-NEXT: %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 ret void } define void @test_size_not_integer(ptr %a1) { -; CHECK: Size must be a constant integer -; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !5 store i8 1, ptr %a1, align 1, !tbaa.struct !5 ret void } define void @test_offset_not_integer(ptr %a1, ptr %a2) { -; CHECK: Offset must be a constant integer -; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 ret void } define void @test_tbaa_missing(ptr %a1, ptr %a2) { -; CHECK: TBAA tag missing -; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 ret void } define void @test_tbaa_invalid(ptr %a1) { -; CHECK: Old-style TBAA is no longer allowed, use struct-path TBAA instead -; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !8 store i8 1, ptr %a1, align 1, !tbaa.struct !8 ret void } diff --git a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp index 1fd81bd407becb..0a947f6e206fef 100644 --- a/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp +++ b/llvm/tools/llvm-exegesis/lib/SubprocessMemory.cpp @@ -63,6 +63,10 @@ Error SubprocessMemory::addMemoryDefinition( SharedMemoryNames.push_back(SharedMemoryName); int SharedMemoryFD = shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (SharedMemoryFD == -1) + return make_error( + "Failed to create shared memory object for memory definition: " + + Twine(strerror(errno))); if (ftruncate(SharedMemoryFD, MemVal.SizeBytes) != 0) { return make_error("Truncating a memory definiton failed: " + Twine(strerror(errno))); @@ -100,7 +104,8 @@ Expected SubprocessMemory::setupAuxiliaryMemoryInSubprocess( shm_open(AuxiliaryMemoryName.c_str(), O_RDWR, S_IRUSR | S_IWUSR); if (AuxiliaryMemoryFileDescriptor == -1) return make_error( - "Getting file descriptor for auxiliary memory failed"); + "Getting file descriptor for auxiliary memory failed: " + + Twine(strerror(errno))); // set up memory value file descriptors int *AuxiliaryMemoryMapping = (int *)mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp index 193f95443b16ef..19d42b7688dac8 100644 --- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp +++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp @@ -1077,30 +1077,25 @@ OperandPredicateMatcher::~OperandPredicateMatcher() {} bool OperandPredicateMatcher::isHigherPriorityThan( const OperandPredicateMatcher &B) const { // Generally speaking, an instruction is more important than an Int or a - // LiteralInt because it can cover more nodes but theres an exception to + // LiteralInt because it can cover more nodes but there's an exception to // this. G_CONSTANT's are less important than either of those two because they // are more permissive. - const InstructionOperandMatcher *AOM = - dyn_cast(this); - const InstructionOperandMatcher *BOM = - dyn_cast(&B); + const auto *AOM = dyn_cast(this); + const auto *BOM = dyn_cast(&B); bool AIsConstantInsn = AOM && AOM->getInsnMatcher().isConstantInstruction(); bool BIsConstantInsn = BOM && BOM->getInsnMatcher().isConstantInstruction(); - if (AOM && BOM) { - // The relative priorities between a G_CONSTANT and any other instruction - // don't actually matter but this code is needed to ensure a strict weak - // ordering. This is particularly important on Windows where the rules will - // be incorrectly sorted without it. - if (AIsConstantInsn != BIsConstantInsn) - return AIsConstantInsn < BIsConstantInsn; - return false; - } + // The relative priorities between a G_CONSTANT and any other instruction + // don't actually matter but this code is needed to ensure a strict weak + // ordering. This is particularly important on Windows where the rules will + // be incorrectly sorted without it. + if (AOM && BOM) + return !AIsConstantInsn && BIsConstantInsn; - if (AOM && AIsConstantInsn && (B.Kind == OPM_Int || B.Kind == OPM_LiteralInt)) + if (AIsConstantInsn && (B.Kind == OPM_Int || B.Kind == OPM_LiteralInt)) return false; - if (BOM && BIsConstantInsn && (Kind == OPM_Int || Kind == OPM_LiteralInt)) + if (BIsConstantInsn && (Kind == OPM_Int || Kind == OPM_LiteralInt)) return true; return Kind < B.Kind; diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index 5ceb85e7d9903b..7fd88dec71d491 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/IR/Dominance.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Debug.h" #include @@ -552,6 +553,305 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern { ControlPropagationFn controlFn; }; +/// Project dimsPos to the inner-most non-unit dim pos with reassocIndices. +/// +/// For example, given dimsPos [0, 2], reassocIndices [[0, 1], [2, 3]], and +/// targetShape [16, 16, 32, 1], it returns [1, 2]. Because for pos 0, the +/// inner-most projected dim in pos [0, 1] is 1. And for pos 2, the inner-most +/// non-unit projected dims in pos [2, 3] is 2. +/// +/// If all candidates in a reassociation are unit dims, it chooses the +/// inner-most dim pos. +static SmallVector +projectToInnerMostNonUnitDimsPos(ArrayRef dimsPos, + ArrayRef reassocIndices, + ArrayRef targetShape) { + SmallVector projectedDimsPos; + for (auto pos : dimsPos) { + // In the case all dims are unit, this will return the inner-most one. + int64_t projectedPos = reassocIndices[pos].back(); + for (auto i : llvm::reverse(reassocIndices[pos])) { + int64_t dim = targetShape[i]; + if (dim > 1 || ShapedType::isDynamic(dim)) { + projectedPos = i; + break; + } + } + projectedDimsPos.push_back(projectedPos); + } + return projectedDimsPos; +} + +/// Check if all dims in dimsPos are divisible by the corresponding tile sizes. +static bool isDimsDivisibleByTileSizes(ArrayRef dimsPos, + ArrayRef shape, + ArrayRef tileSizes) { + for (auto [pos, tileSize] : llvm::zip_equal(dimsPos, tileSizes)) { + int64_t dim = shape[pos]; + if (ShapedType::isDynamic(dim) || (dim % tileSize) != 0) + return false; + } + return true; +} + +/// Permutate the reassociation indices and reindex them in the sequence order. +/// Returns the next dim pos in the sequence. +/// +/// For example, given reassocIndices [[0, 1], [2]] and permutation [1, 0], it +/// applies the permutation to get [[2], [0, 1]] and reindexes the indices into +/// [[0], [1, 2]]. +static int64_t applyPermutationAndReindexReassoc( + SmallVector &reassocIndices, + ArrayRef permutation) { + applyPermutationToVector(reassocIndices, permutation); + int64_t nextPos = 0; + for (ReassociationIndices &indices : reassocIndices) { + for (auto &index : indices) { + index = nextPos; + nextPos += 1; + } + } + return nextPos; +} + +/// Bubble up pack op through collapse shape op when the packed dims can be +/// projected to the dims before collapsing. This is possible when the inner +/// tile sizes can divide the projected dims. +/// +/// For example: +/// +/// %collapsed = tensor.collapse_shape %in [[0, 1], 2] +/// : tensor into tensor +/// %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] +/// inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %empty +/// : tensor -> tensor +/// +/// can be transformed into: +/// +/// %pack = tensor.pack %in outer_dims_perm = [1, 2] +/// inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %empty +/// : tensor -> tensor +/// %collapsed = tensor.collapse_shape %pack [[0, 1], 2, 3, 4] +/// : tensor into tensor +static LogicalResult +bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp, + tensor::PackOp packOp, + PatternRewriter &rewriter) { + SmallVector innerTileSizes = packOp.getStaticTiles(); + ArrayRef innerDimsPos = packOp.getInnerDimsPos(); + ArrayRef outerDimsPerm = packOp.getOuterDimsPerm(); + + ArrayRef srcShape = collapseOp.getSrcType().getShape(); + SmallVector reassocIndices = + collapseOp.getReassociationIndices(); + // Project inner tile pos to the dim pos before collapsing. For example, if + // dims [x, y] is collapsed into [z], packing on dim z can be projected back + // to pack on dim y. + // + // Project to inner-most non-unit dims to increase the chance that they can be + // divided by the inner tile sizes. This is correct because for [..., x, 1], + // packing on dim 1 is equivalent to packing on dim x. + SmallVector projectedInnerDimsPos = + projectToInnerMostNonUnitDimsPos(innerDimsPos, reassocIndices, srcShape); + + if (!isDimsDivisibleByTileSizes(projectedInnerDimsPos, srcShape, + innerTileSizes)) { + return failure(); + } + // Expand the outer dims permutation with the associated source dims for the + // new permutation after bubbling. This is because moving a collapsed dim is + // equivalent to moving the associated source dims together. + SmallVector newOuterDimsPerm; + for (auto outerPos : outerDimsPerm) { + newOuterDimsPerm.insert(newOuterDimsPerm.end(), + reassocIndices[outerPos].begin(), + reassocIndices[outerPos].end()); + } + + auto emptyOp = tensor::PackOp::createDestinationTensor( + rewriter, packOp.getLoc(), collapseOp.getSrc(), packOp.getMixedTiles(), + projectedInnerDimsPos, newOuterDimsPerm); + auto newPackOp = rewriter.create( + packOp.getLoc(), collapseOp.getSrc(), emptyOp, projectedInnerDimsPos, + packOp.getMixedTiles(), packOp.getPaddingValue(), newOuterDimsPerm); + + SmallVector newReassocIndices = reassocIndices; + // First apply the permutation on the reassociations of the outer dims. + // For example given the permutation [1, 0], the reassociations [[0, 1], [2]] + // -> [[0], [1, 2]] + int64_t nextPos = + applyPermutationAndReindexReassoc(newReassocIndices, outerDimsPerm); + // Then add direct mapping for the inner tile dims. + for (size_t i = 0; i < innerDimsPos.size(); ++i) { + newReassocIndices.push_back({nextPos}); + nextPos += 1; + } + + auto newCollapseOp = rewriter.create( + collapseOp.getLoc(), packOp.getType(), newPackOp, newReassocIndices); + rewriter.replaceOp(packOp, newCollapseOp); + + return success(); +} + +class BubbleUpPackOpThroughReshapeOp final + : public OpRewritePattern { +public: + BubbleUpPackOpThroughReshapeOp(MLIRContext *context, ControlPropagationFn fun) + : OpRewritePattern(context), controlFn(std::move(fun)) {} + + LogicalResult matchAndRewrite(tensor::PackOp packOp, + PatternRewriter &rewriter) const override { + Operation *srcOp = packOp.getSource().getDefiningOp(); + // Currently only support when the pack op is the only user. + if (!srcOp || !(srcOp->getNumResults() == 1) || + !srcOp->getResult(0).hasOneUse()) { + return failure(); + } + // Currently only support static inner tile sizes. + if (llvm::any_of(packOp.getStaticTiles(), [](int64_t size) { + return ShapedType::isDynamic(size); + })) { + return failure(); + } + + // User controlled propagation function. + if (!controlFn(srcOp)) + return failure(); + + return TypeSwitch(srcOp) + .Case([&](tensor::CollapseShapeOp op) { + return bubbleUpPackOpThroughCollapseShape(op, packOp, rewriter); + }) + .Default([](Operation *) { return failure(); }); + } + +private: + ControlPropagationFn controlFn; +}; + +/// Push down unpack op through expand shape op when the packed dims can be +/// projected to the dims after expanding. This is possible when the inner tile +/// sizes can divide the projected dims. +/// +/// For example: +/// +/// %unpack = tensor.unpack %in outer_dims_perm = [0, 1] +/// inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %empty +/// : tensor -> tensor +/// %expanded = tensor.expand_shape %unpack [[0, 1], [2]] +/// : tensor into tensor +/// +/// can be transformed into: +/// +/// %expanded = tensor.expand_shape %ain [[0, 1], [2], [3], [4]] +/// : tensor into tensor +/// %unpack = tensor.unpack %expanded outer_dims_perm = [0, 1, 2] +/// inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %empty +/// : tensor -> tensor +static LogicalResult +pushDownUnPackOpThroughExpandShape(tensor::UnPackOp unPackOp, + tensor::ExpandShapeOp expandOp, + PatternRewriter &rewriter) { + SmallVector innerTileSizes = unPackOp.getStaticTiles(); + ArrayRef innerDimsPos = unPackOp.getInnerDimsPos(); + ArrayRef outerDimsPerm = unPackOp.getOuterDimsPerm(); + + ArrayRef dstShape = expandOp.getType().getShape(); + SmallVector reassocIndices = + expandOp.getReassociationIndices(); + // Project inner tile pos to the dim pos after expanding. For example, if dims + // [z] is expanded into [x, y], unpacking on dim z can be projected to unpack + // on dim y. + // + // Project to inner-most non-unit dims to increase the chance that they can be + // divided by the inner tile sizes. This is correct because for [..., x, 1], + // unpacking on dim 1 is equivalent to unpacking on dim x. + SmallVector projectedInnerDimsPos = + projectToInnerMostNonUnitDimsPos(innerDimsPos, reassocIndices, dstShape); + + if (!isDimsDivisibleByTileSizes(projectedInnerDimsPos, dstShape, + innerTileSizes)) { + return failure(); + } + // Expand the outer dims permutation with the associated expanded dims for the + // new permutation after pushing. This is because moving a source dim is + // equivalent to moving the associated expanded dims together. + SmallVector newOuterDimsPerm; + for (auto outerPos : outerDimsPerm) { + newOuterDimsPerm.insert(newOuterDimsPerm.end(), + reassocIndices[outerPos].begin(), + reassocIndices[outerPos].end()); + } + + SmallVector newReassocIndices = reassocIndices; + // First apply the permutation on the reassociations of the outer dims. + // For example given the permutation [1, 0], the reassociations [[0, 1], [2]] + // -> [[0], [1, 2]] + int64_t nextPos = + applyPermutationAndReindexReassoc(newReassocIndices, outerDimsPerm); + // Then add direct mapping for the inner tile dims. + for (size_t i = 0; i < innerDimsPos.size(); ++i) { + newReassocIndices.push_back({nextPos}); + nextPos += 1; + } + + RankedTensorType newExpandType = + tensor::PackOp::inferPackedType(expandOp.getType(), innerTileSizes, + projectedInnerDimsPos, newOuterDimsPerm); + auto newExpandOp = rewriter.create( + expandOp.getLoc(), newExpandType, unPackOp.getSource(), + newReassocIndices); + + auto emptyOp = tensor::UnPackOp::createDestinationTensor( + rewriter, unPackOp.getLoc(), newExpandOp, unPackOp.getMixedTiles(), + projectedInnerDimsPos, newOuterDimsPerm); + auto newUnPackOp = rewriter.create( + unPackOp.getLoc(), newExpandOp.getResult(), emptyOp, + projectedInnerDimsPos, unPackOp.getMixedTiles(), newOuterDimsPerm); + rewriter.replaceOp(expandOp, newUnPackOp); + + return success(); +} + +class PushDownUnPackOpThroughReshapeOp final + : public OpRewritePattern { +public: + PushDownUnPackOpThroughReshapeOp(MLIRContext *context, + ControlPropagationFn fun) + : OpRewritePattern(context), controlFn(std::move(fun)) { + } + + LogicalResult matchAndRewrite(tensor::UnPackOp unPackOp, + PatternRewriter &rewriter) const override { + Value result = unPackOp.getResult(); + // Currently only support unpack op with the single user. + if (!result.hasOneUse()) { + return failure(); + } + // Currently only support static inner tile sizes. + if (llvm::any_of(unPackOp.getStaticTiles(), [](int64_t size) { + return ShapedType::isDynamic(size); + })) { + return failure(); + } + + Operation *consumerOp = *result.user_begin(); + // User controlled propagation function. + if (!controlFn(consumerOp)) + return failure(); + + return TypeSwitch(consumerOp) + .Case([&](tensor::ExpandShapeOp op) { + return pushDownUnPackOpThroughExpandShape(unPackOp, op, rewriter); + }) + .Default([](Operation *) { return failure(); }); + } + +private: + ControlPropagationFn controlFn; +}; + // TODO: Relax this restriction. We should unpack a generic op also // in the presence of multiple unpack ops as producers. /// Return the unpacked operand, if present, for the current generic op. @@ -774,6 +1074,7 @@ void mlir::linalg::populateDataLayoutPropagationPatterns( const ControlPropagationFn &controlPackUnPackPropagation) { patterns .insert( + BubbleUpPackOpThroughReshapeOp, PushDownUnPackOpThroughGenericOp, + PushDownUnPackThroughPadOp, PushDownUnPackOpThroughReshapeOp>( patterns.getContext(), controlPackUnPackPropagation); } diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index 48e4b8cd88b58e..41c619566b55df 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -51,8 +51,20 @@ void stdSort(uint64_t n, V *p) { // details of our vectors. Also useful for direct LLVM IR output. extern "C" void printI64(int64_t i) { fprintf(stdout, "%" PRId64, i); } extern "C" void printU64(uint64_t u) { fprintf(stdout, "%" PRIu64, u); } -extern "C" void printF32(float f) { fprintf(stdout, "%g", f); } -extern "C" void printF64(double d) { fprintf(stdout, "%lg", d); } +extern "C" void printF32(float f) { + if (std::isnan(f) && std::signbit(f)) { + fprintf(stdout, "-nan"); + } else { + fprintf(stdout, "%g", f); + } +} +extern "C" void printF64(double d) { + if (std::isnan(d) && std::signbit(d)) { + fprintf(stdout, "-nan"); + } else { + fprintf(stdout, "%lg", d); + } +} extern "C" void printString(char const *s) { fputs(s, stdout); } extern "C" void printOpen() { fputs("( ", stdout); } extern "C" void printClose() { fputs(" )", stdout); } diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index e036695a2ac9fd..79d61ab757e327 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -905,3 +905,163 @@ func.func @unpack_different_destination_shape(%arg0: tensor<1x1x1080x1920x16xi32 // CHECK-SAME: inner_dims_pos = [0] inner_tiles = [16] // CHECK-SAME: into %[[UNPACK_NEW_DEST]] // CHECK: return %[[UNPACK]] : tensor<16x540x960xi32> + +// ----- + +func.func @bubble_up_pack_through_collapse(%1: tensor, %dim : index) -> tensor { + %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor + %2 = tensor.empty(%dim) : tensor + %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor -> tensor + func.return %pack : tensor +} +// CHECK-LABEL: func.func @bubble_up_pack_through_collapse +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3], [4]] : tensor into tensor +// CHECK: return %[[COLLAPSED]] : tensor + +// ----- + +func.func @bubble_up_permuted_pack_through_collapse(%1: tensor<4x192x16x256xf32>) -> tensor<4x32x3072x8x1xf32> { + %collapsed = tensor.collapse_shape %1 [[0], [1, 2], [3]] : tensor<4x192x16x256xf32> into tensor<4x3072x256xf32> + %2 = tensor.empty() : tensor<4x32x3072x8x1xf32> + %pack = tensor.pack %collapsed outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 1] into %2 : tensor<4x3072x256xf32> -> tensor<4x32x3072x8x1xf32> + func.return %pack : tensor<4x32x3072x8x1xf32> +} +// CHECK-LABEL: func.func @bubble_up_permuted_pack_through_collapse +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x32x192x16x8x1xf32> +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<4x192x16x256xf32> -> tensor<4x32x192x16x8x1xf32> +// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %pack {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x192x16x8x1xf32> into tensor<4x32x3072x8x1xf32> +// CHECK: return %[[COLLAPSED]] : tensor<4x32x3072x8x1xf32> + +// ----- + +func.func @bubble_up_pack_through_unit_collapse(%1: tensor<1x64x1x4xf32>) -> tensor<8x4x8x1xf32> { + %collapsed = tensor.collapse_shape %1 [[0, 1, 2], [3]] : tensor<1x64x1x4xf32> into tensor<64x4xf32> + %2 = tensor.empty() : tensor<8x4x8x1xf32> + %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %2 : tensor<64x4xf32> -> tensor<8x4x8x1xf32> + func.return %pack : tensor<8x4x8x1xf32> +} +// CHECK-LABEL: func.func @bubble_up_pack_through_unit_collapse +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x8x1x4x8x1xf32> +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 1] into %[[EMPTY]] : tensor<1x64x1x4xf32> -> tensor<1x8x1x4x8x1xf32> +// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<1x8x1x4x8x1xf32> into tensor<8x4x8x1xf32> +// CHECK: return %[[COLLAPSED]] : tensor<8x4x8x1xf32> + +// ----- + +func.func @bubble_up_pack_through_collapse_on_outer_dims(%1: tensor, %dim : index) -> tensor { + %collapsed = tensor.collapse_shape %1 [[0, 1], [2]] : tensor into tensor + %2 = tensor.empty(%dim) : tensor + %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [4] into %2 : tensor -> tensor + func.return %pack : tensor +} +// CHECK-LABEL: func.func @bubble_up_pack_through_collapse_on_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] : tensor +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor +// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [4] into %[[EMPTY]] : tensor -> tensor +// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[PACK]] {{\[}}[0, 1], [2], [3]] : tensor into tensor +// CHECK: return %[[COLLAPSED]] : tensor + +// ----- + +func.func @no_bubble_up_pack_through_non_divisible_collapse(%1: tensor<3072x64x4xf32>) -> tensor<384x32x8x8xf32> { + %collapsed = tensor.collapse_shape %1 [[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32> + %2 = tensor.empty() : tensor<384x32x8x8xf32> + %pack = tensor.pack %collapsed outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %2 : tensor<3072x256xf32> -> tensor<384x32x8x8xf32> + func.return %pack : tensor<384x32x8x8xf32> +} +// CHECK-LABEL: func.func @no_bubble_up_pack_through_non_divisible_collapse +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0], [1, 2]] : tensor<3072x64x4xf32> into tensor<3072x256xf32> +// CHECK: %[[PACK:.+]] = tensor.pack %[[COLLAPSED]] +// CHECK: return %[[PACK]] : tensor<384x32x8x8xf32> + +// ----- + +func.func @push_down_unpack_through_expand(%5: tensor, %dim: index) -> tensor { + %6 = tensor.empty(%dim) : tensor + %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor -> tensor + %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor into tensor + func.return %expanded : tensor +} +// CHECK-LABEL: func.func @push_down_unpack_through_expand +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3], [4]] : tensor into tensor +// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor -> tensor +// CHECK: return %[[UNPACK]] : tensor + +// ----- + +func.func @push_down_permuted_unpack_through_expand(%5: tensor<4x32x384x8x8xf32>) -> tensor<4x12x256x256xf32> { + %6 = tensor.empty() : tensor<4x3072x256xf32> + %unpack = tensor.unpack %5 outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [8, 8] into %6 : tensor<4x32x384x8x8xf32> -> tensor<4x3072x256xf32> + %expanded = tensor.expand_shape %unpack [[0], [1, 2], [3]] : tensor<4x3072x256xf32> into tensor<4x12x256x256xf32> + func.return %expanded : tensor<4x12x256x256xf32> +} +// CHECK-LABEL: @push_down_permuted_unpack_through_expand +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1], [2, 3], [4], [5]] : tensor<4x32x384x8x8xf32> into tensor<4x32x12x32x8x8xf32> +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x12x256x256xf32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<4x32x12x32x8x8xf32> -> tensor<4x12x256x256xf32> +// CHECK: return %[[UNPACK]] : tensor<4x12x256x256xf32> + +// ----- + +func.func @push_down_unpack_through_unit_expand(%5: tensor<6x32x8x8xf32>) -> tensor<3x16x1x256xf32> { + %6 = tensor.empty() : tensor<48x256xf32> + %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<6x32x8x8xf32> -> tensor<48x256xf32> + %expanded = tensor.expand_shape %unpack [[0, 1, 2], [3]] : tensor<48x256xf32> into tensor<3x16x1x256xf32> + func.return %expanded : tensor<3x16x1x256xf32> +} +// CHECK-LABEL: func.func @push_down_unpack_through_unit_expand +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1, 2], [3], [4], [5]] : tensor<6x32x8x8xf32> into tensor<3x2x1x32x8x8xf32> +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x16x1x256xf32> +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED]] outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [1, 3] inner_tiles = [8, 8] into %[[EMPTY]] : tensor<3x2x1x32x8x8xf32> -> tensor<3x16x1x256xf32> +// CHECK: return %[[UNPACK]] : tensor<3x16x1x256xf32> + +// ----- + +func.func @push_down_unpack_through_expand_on_outer_dims(%5: tensor, %dim: index) -> tensor { + %6 = tensor.empty(%dim) : tensor + %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [1] inner_tiles = [8] into %6 : tensor -> tensor + %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor into tensor + func.return %expanded : tensor +} +// CHECK-LABEL: func.func @push_down_unpack_through_expand_on_outer_dims +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0, 1], [2], [3]] : tensor into tensor +// CHECK: %[[DIM:.+]] = tensor.dim %[[EXPANDED]], %[[C0]] : tensor +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[DIM]]) : tensor +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[EXPANDED:.+]] outer_dims_perm = [0, 1, 2] inner_dims_pos = [2] inner_tiles = [8] into %[[EMPTY]] : tensor -> tensor +// CHECK: return %[[UNPACK]] : tensor + +// ----- + +func.func @no_push_down_unpack_through_non_divisible_expand(%5: tensor<384x32x8x8xf32>) -> tensor<256x12x256xf32> { + %6 = tensor.empty() : tensor<3072x256xf32> + %unpack = tensor.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<384x32x8x8xf32> -> tensor<3072x256xf32> + %expanded = tensor.expand_shape %unpack [[0, 1], [2]] : tensor<3072x256xf32> into tensor<256x12x256xf32> + func.return %expanded : tensor<256x12x256xf32> +} +// CHECK-LABEL: func.func @no_push_down_unpack_through_non_divisible_expand +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK: %[[UNPACK:.+]] = tensor.unpack %[[ARG0]] +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[UNPACK]] {{\[}}[0, 1], [2]] : tensor<3072x256xf32> into tensor<256x12x256xf32> +// CHECK: return %[[EXPANDED]] : tensor<256x12x256xf32> diff --git a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir index e2229a392bbf76..340ef30bf59c29 100644 --- a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir +++ b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir @@ -190,6 +190,12 @@ func.func @func_powff64(%a : f64, %b : f64) { return } +func.func @func_powff32(%a : f32, %b : f32) { + %r = math.powf %a, %b : f32 + vector.print %r : f32 + return +} + func.func @powf() { // CHECK-NEXT: 16 %a = arith.constant 4.0 : f64 @@ -230,7 +236,17 @@ func.func @powf() { %j = arith.constant 29385.0 : f64 %j_p = arith.constant 23598.0 : f64 call @func_powff64(%j, %j_p) : (f64, f64) -> () - return + + // CHECK-NEXT: -nan + %k = arith.constant 1.0 : f64 + %k_p = arith.constant 0xfff0000001000000 : f64 + call @func_powff64(%k, %k_p) : (f64, f64) -> () + + // CHECK-NEXT: -nan + %l = arith.constant 1.0 : f32 + %l_p = arith.constant 0xffffffff : f32 + call @func_powff32(%l, %l_p) : (f32, f32) -> () + return } // -------------------------------------------------------------------------- // diff --git a/openmp/libomptarget/plugins-nextgen/host/dynamic_ffi/ffi.cpp b/openmp/libomptarget/plugins-nextgen/host/dynamic_ffi/ffi.cpp index c79daa79858171..c586ad1c1969b3 100644 --- a/openmp/libomptarget/plugins-nextgen/host/dynamic_ffi/ffi.cpp +++ b/openmp/libomptarget/plugins-nextgen/host/dynamic_ffi/ffi.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DynamicLibrary.h" + +#include "Shared/Debug.h" #include #include "DLWrap.h" @@ -37,15 +39,21 @@ uint32_t ffi_init() { std::string ErrMsg; auto DynlibHandle = std::make_unique( llvm::sys::DynamicLibrary::getPermanentLibrary(FFI_PATH, &ErrMsg)); - if (!DynlibHandle->isValid()) + + if (!DynlibHandle->isValid()) { + DP("Unable to load library '%s': %s!\n", FFI_PATH, ErrMsg.c_str()); return DYNAMIC_FFI_FAIL; + } for (size_t I = 0; I < dlwrap::size(); I++) { const char *Sym = dlwrap::symbol(I); void *P = DynlibHandle->getAddressOfSymbol(Sym); - if (P == nullptr) + if (P == nullptr) { + DP("Unable to find '%s' in '%s'!\n", Sym, FFI_PATH); return DYNAMIC_FFI_FAIL; + } + DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P); *dlwrap::pointer(I) = P; } @@ -53,8 +61,10 @@ uint32_t ffi_init() { #define DYNAMIC_INIT(SYMBOL) \ { \ void *SymbolPtr = DynlibHandle->getAddressOfSymbol(#SYMBOL); \ - if (!SymbolPtr) \ + if (!SymbolPtr) { \ + DP("Unable to find '%s' in '%s'!\n", #SYMBOL, FFI_PATH); \ return DYNAMIC_FFI_FAIL; \ + } \ SYMBOL = *reinterpret_cast(SymbolPtr); \ } DYNAMIC_INIT(ffi_type_void); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 7c0b981a35c396..8ba54d44a7d4f8 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -80,9 +80,7 @@ DeviceTy::~DeviceTy() { llvm::Error DeviceTy::init() { // Make call to init_requires if it exists for this plugin. int32_t Ret = 0; - if (RTL->init_requires) - Ret = RTL->init_requires(PM->getRequirements()); - + Ret = RTL->init_requires(PM->getRequirements()); if (Ret != OFFLOAD_SUCCESS) return llvm::createStringError( llvm::inconvertibleErrorCode(), @@ -175,11 +173,11 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size, /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) - if (ForceSynchronousTargetRegions || !AsyncInfo || #ifdef OMPT_SUPPORT - ompt::CallbacksInitialized || + if (ForceSynchronousTargetRegions || !AsyncInfo || ompt::CallbacksInitialized) +#else + if (ForceSynchronousTargetRegions || !AsyncInfo) #endif - !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, AsyncInfo); @@ -207,11 +205,11 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size, /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) - if (ForceSynchronousTargetRegions || !RTL->data_retrieve_async || #ifdef OMPT_SUPPORT - ompt::CallbacksInitialized || + if (ForceSynchronousTargetRegions || ompt::CallbacksInitialized) +#else + if (ForceSynchronousTargetRegions) #endif - !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, AsyncInfo); @@ -237,11 +235,13 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, .getTraceGenerators(), RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size, /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) - if (ForceSynchronousTargetRegions || !AsyncInfo || + #ifdef OMPT_SUPPORT - ompt::CallbacksInitialized || + if (ForceSynchronousTargetRegions || !AsyncInfo || + ompt::CallbacksInitialized) { +#else + if (ForceSynchronousTargetRegions || !AsyncInfo) { #endif - !RTL->data_exchange_async || !RTL->synchronize) { assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size); @@ -251,9 +251,6 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, } int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { - if (!RTL->data_notify_mapped) - return OFFLOAD_SUCCESS; - DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n", DPxPTR(HstPtr), Size); @@ -265,9 +262,6 @@ int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) { } int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { - if (!RTL->data_notify_unmapped) - return OFFLOAD_SUCCESS; - DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr)); if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) { @@ -294,70 +288,46 @@ int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, // Run region on device bool DeviceTy::printDeviceInfo() { - if (!RTL->print_device_info) - return false; RTL->print_device_info(RTLDeviceID); return true; } // Whether data can be copied to DstDevice directly bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) { - if (RTL != DstDevice.RTL || !RTL->is_data_exchangable) + if (RTL != DstDevice.RTL) return false; if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID)) - return (RTL->data_exchange != nullptr) || - (RTL->data_exchange_async != nullptr); - + return true; return false; } int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) { - if (RTL->synchronize) - return RTL->synchronize(RTLDeviceID, AsyncInfo); - return OFFLOAD_SUCCESS; + return RTL->synchronize(RTLDeviceID, AsyncInfo); } int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) { - if (RTL->query_async) - return RTL->query_async(RTLDeviceID, AsyncInfo); - - return synchronize(AsyncInfo); + return RTL->query_async(RTLDeviceID, AsyncInfo); } int32_t DeviceTy::createEvent(void **Event) { - if (RTL->create_event) - return RTL->create_event(RTLDeviceID, Event); - - return OFFLOAD_SUCCESS; + return RTL->create_event(RTLDeviceID, Event); } int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) { - if (RTL->record_event) - return RTL->record_event(RTLDeviceID, Event, AsyncInfo); - - return OFFLOAD_SUCCESS; + return RTL->record_event(RTLDeviceID, Event, AsyncInfo); } int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) { - if (RTL->wait_event) - return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); - - return OFFLOAD_SUCCESS; + return RTL->wait_event(RTLDeviceID, Event, AsyncInfo); } int32_t DeviceTy::syncEvent(void *Event) { - if (RTL->sync_event) - return RTL->sync_event(RTLDeviceID, Event); - - return OFFLOAD_SUCCESS; + return RTL->sync_event(RTLDeviceID, Event); } int32_t DeviceTy::destroyEvent(void *Event) { - if (RTL->create_event) - return RTL->destroy_event(RTLDeviceID, Event); - - return OFFLOAD_SUCCESS; + return RTL->destroy_event(RTLDeviceID, Event); } void DeviceTy::dumpOffloadEntries() { diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 0d67661e6485ac..7fcbe7dcff2375 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -482,10 +482,8 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { assert(PM && "Runtime not initialized"); std::atomic &InfoLevel = getInfoLevelInternal(); InfoLevel.store(NewInfoLevel); - for (auto &R : PM->pluginAdaptors()) { - if (R.set_info_flag) - R.set_info_flag(NewInfoLevel); - } + for (auto &R : PM->pluginAdaptors()) + R.set_info_flag(NewInfoLevel); } EXTERN int __tgt_print_device_info(int64_t DeviceId) { diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 514b1d61334481..0c85bf7897b2b6 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -463,12 +463,10 @@ void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); int32_t Err = 0; - if (!DeviceOrErr->RTL->data_lock) { - Err = DeviceOrErr->RTL->data_lock(DeviceNum, HostPtr, Size, &RC); - if (Err) { - DP("Could not lock ptr %p\n", HostPtr); - return nullptr; - } + Err = DeviceOrErr->RTL->data_lock(DeviceNum, HostPtr, Size, &RC); + if (Err) { + DP("Could not lock ptr %p\n", HostPtr); + return nullptr; } DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(RC)); return RC; @@ -481,9 +479,7 @@ void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) { if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); - if (!DeviceOrErr->RTL->data_unlock) - DeviceOrErr->RTL->data_unlock(DeviceNum, HostPtr); - + DeviceOrErr->RTL->data_unlock(DeviceNum, HostPtr); DP("%s returns\n", Name); } diff --git a/revert_patches.txt b/revert_patches.txt index 70553bd34c974f..983d470c44336a 100644 --- a/revert_patches.txt +++ b/revert_patches.txt @@ -12,8 +12,3 @@ Revert :breaks hip catch tests. Revert: breaks sles build 36146d2b6be [ELF] Make LinkerDrive::link a template. NFC -Revert: breaks openmp aomp build -dcbddc252501 [Libomptarget] Unify and simplify plugin CMake (#86191) -85af772f3b40 [Libomptarget][FIX] Fix unintentinally used PUBLIC interface -3f5e649ff64a [Libomptarget] Fix linking to LLVM dylib (#86397 -9f0321ccf118 [Libomptarget] Make plugins depend explicitly diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index eb0afbb6dd6ffe..acdf9349fd5868 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -68,6 +68,7 @@ libc_support_library( name = "llvm_libc_macros_math_macros", hdrs = ["include/llvm-libc-macros/math-macros.h"], deps = [":llvm_libc_macros_limits_macros"], + defines = ["__FP_LOGBNAN_MIN"], ) libc_support_library(