diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 73ef2226d51dca..bff3e442c6a7e5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -197,6 +197,9 @@ Improvements to Clang's diagnostics - Clang now diagnoses declarative nested name specifiers that name alias templates. +- Clang now diagnoses lambda function expressions being implicitly cast to boolean values, under ``-Wpointer-bool-conversion``. + Fixes `#82512 `_. + Improvements to Clang's time-trace ---------------------------------- @@ -295,6 +298,11 @@ Bug Fixes to C++ Support lookup searches the bases of an incomplete class. - Fix a crash when an unresolved overload set is encountered on the RHS of a ``.*`` operator. (`#53815 `_) +- In ``__restrict``-qualified member functions, attach ``__restrict`` to the pointer type of + ``this`` rather than the pointee type. + Fixes (`#82941 `_), + (`#42411 `_), and + (`#18121 `_). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 2fbc56d49a59a1..2c83dca248fb7d 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4536,6 +4536,18 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLFrac : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_frac"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + +def HLSLLerp : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_lerp"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f54b9732d65bef..b408796c76578f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -4127,8 +4127,8 @@ def ext_ms_impcast_fn_obj : ExtWarn< "Microsoft extension">, InGroup; def warn_impcast_pointer_to_bool : Warning< - "address of%select{| function| array}0 '%1' will always evaluate to " - "'true'">, + "address of %select{'%1'|function '%1'|array '%1'|lambda function pointer " + "conversion operator}0 will always evaluate to 'true'">, InGroup; def warn_cast_nonnull_to_bool : Warning< "nonnull %select{function call|parameter}0 '%1' will evaluate to " @@ -10264,6 +10264,11 @@ def err_block_on_vm : Error< def err_sizeless_nonlocal : Error< "non-local variable with sizeless type %0">; +def err_vec_builtin_non_vector_all : Error< + "all arguments to %0 must be vectors">; +def err_vec_builtin_incompatible_vector_all : Error< + "all arguments to %0 must have vectors of the same type">; + def err_vec_builtin_non_vector : Error< "first two arguments to %0 must be vectors">; def err_vec_builtin_incompatible_vector : Error< @@ -12212,4 +12217,8 @@ def err_acc_construct_appertainment def err_acc_branch_in_out_compute_construct : Error<"invalid %select{branch|return}0 %select{out of|into}1 OpenACC " "Compute Construct">; +def note_acc_branch_into_compute_construct + : Note<"invalid branch into OpenACC Compute Construct">; +def note_acc_branch_out_of_compute_construct + : Note<"invalid branch out of OpenACC Compute Construct">; } // end of sema component. diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 48e9cec482755c..b94d13609c3dd2 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1369,13 +1369,35 @@ class TargetInfo : public TransferrableTargetInfo, } struct BranchProtectionInfo { - LangOptions::SignReturnAddressScopeKind SignReturnAddr = - LangOptions::SignReturnAddressScopeKind::None; - LangOptions::SignReturnAddressKeyKind SignKey = - LangOptions::SignReturnAddressKeyKind::AKey; - bool BranchTargetEnforcement = false; - bool BranchProtectionPAuthLR = false; - bool GuardedControlStack = false; + LangOptions::SignReturnAddressScopeKind SignReturnAddr; + LangOptions::SignReturnAddressKeyKind SignKey; + bool BranchTargetEnforcement; + bool BranchProtectionPAuthLR; + bool GuardedControlStack; + + BranchProtectionInfo() = default; + + const char *getSignReturnAddrStr() const { + switch (SignReturnAddr) { + case LangOptions::SignReturnAddressScopeKind::None: + return "none"; + case LangOptions::SignReturnAddressScopeKind::NonLeaf: + return "non-leaf"; + case LangOptions::SignReturnAddressScopeKind::All: + return "all"; + } + assert(false && "Unexpected SignReturnAddressScopeKind"); + } + + const char *getSignKeyStr() const { + switch (SignKey) { + case LangOptions::SignReturnAddressKeyKind::AKey: + return "a_key"; + case LangOptions::SignReturnAddressKeyKind::BKey: + return "b_key"; + } + assert(false && "Unexpected SignReturnAddressKeyKind"); + } }; /// Determine if the Architecture in this TargetInfo supports branch diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 117e802dae2d9d..b4f2327d9c560a 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2543,8 +2543,19 @@ QualType CXXMethodDecl::getThisType(const FunctionProtoType *FPT, const CXXRecordDecl *Decl) { ASTContext &C = Decl->getASTContext(); QualType ObjectTy = ::getThisObjectType(C, FPT, Decl); - return C.getLangOpts().HLSL ? C.getLValueReferenceType(ObjectTy) - : C.getPointerType(ObjectTy); + + // Unlike 'const' and 'volatile', a '__restrict' qualifier must be + // attached to the pointer type, not the pointee. + bool Restrict = FPT->getMethodQuals().hasRestrict(); + if (Restrict) + ObjectTy.removeLocalRestrict(); + + ObjectTy = C.getLangOpts().HLSL ? C.getLValueReferenceType(ObjectTy) + : C.getPointerType(ObjectTy); + + if (Restrict) + ObjectTy.addRestrict(); + return ObjectTy; } QualType CXXMethodDecl::getThisType() const { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 141411d210a15b..c8a8f01fae1d89 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18015,6 +18015,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType*/ T0->getScalarType(), Intrinsic::dx_dot, ArrayRef{Op0, Op1}, nullptr, "dx.dot"); } break; + case Builtin::BI__builtin_hlsl_lerp: { + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *S = EmitScalarExpr(E->getArg(2)); + llvm::Type *Xty = X->getType(); + llvm::Type *Yty = Y->getType(); + llvm::Type *Sty = S->getType(); + if (!Xty->isVectorTy() && !Yty->isVectorTy() && !Sty->isVectorTy()) { + if (Xty->isFloatingPointTy()) { + auto V = Builder.CreateFSub(Y, X); + V = Builder.CreateFMul(S, V); + return Builder.CreateFAdd(X, V, "dx.lerp"); + } + llvm_unreachable("Scalar Lerp is only supported on floats."); + } + // A VectorSplat should have happened + assert(Xty->isVectorTy() && Yty->isVectorTy() && Sty->isVectorTy() && + "Lerp of vector and scalar is not supported."); + + [[maybe_unused]] auto *XVecTy = + E->getArg(0)->getType()->getAs(); + [[maybe_unused]] auto *YVecTy = + E->getArg(1)->getType()->getAs(); + [[maybe_unused]] auto *SVecTy = + E->getArg(2)->getType()->getAs(); + // A HLSLVectorTruncation should have happend + assert(XVecTy->getNumElements() == YVecTy->getNumElements() && + XVecTy->getNumElements() == SVecTy->getNumElements() && + "Lerp requires vectors to be of the same size."); + assert(XVecTy->getElementType()->isRealFloatingType() && + XVecTy->getElementType() == YVecTy->getElementType() && + XVecTy->getElementType() == SVecTy->getElementType() && + "Lerp requires float vectors to be of the same type."); + return Builder.CreateIntrinsic( + /*ReturnType*/ Xty, Intrinsic::dx_lerp, ArrayRef{X, Y, S}, + nullptr, "dx.lerp"); + } + case Builtin::BI__builtin_hlsl_elementwise_frac: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("frac operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType*/ Op0->getType(), Intrinsic::dx_frac, + ArrayRef{Op0}, nullptr, "dx.frac"); + } } return nullptr; } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 87cd95945669f2..29493a52a55baa 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7386,18 +7386,28 @@ void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { Device, NumDependences, DependenceList, Data.HasNowaitClause); } - } else if (const auto *C = S.getSingleClause()) { - llvm::Value *InteropvarPtr = - EmitLValue(C->getInteropVar()).getPointer(*this); - OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, - NumDependences, DependenceList, - Data.HasNowaitClause); - } else if (const auto *C = S.getSingleClause()) { - llvm::Value *InteropvarPtr = - EmitLValue(C->getInteropVar()).getPointer(*this); - OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, - NumDependences, DependenceList, - Data.HasNowaitClause); + } + auto ItOMPDestroyClause = S.getClausesOfKind(); + if (!ItOMPDestroyClause.empty()) { + // Look at the multiple destroy clauses + for (const OMPDestroyClause *C : ItOMPDestroyClause) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, + NumDependences, DependenceList, + Data.HasNowaitClause); + } + } + auto ItOMPUseClause = S.getClausesOfKind(); + if (!ItOMPUseClause.empty()) { + // Look at the multiple use clauses + for (const OMPUseClause *C : ItOMPUseClause) { + llvm::Value *InteropvarPtr = + EmitLValue(C->getInteropVar()).getPointer(*this); + OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, + NumDependences, DependenceList, + Data.HasNowaitClause); + } } } diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index adfdd516351901..2b8e2aeb4265f3 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -132,8 +132,7 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { assert(Error.empty()); auto *Fn = cast(GV); - static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; - Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast(BPI.SignReturnAddr)]); + Fn->addFnAttr("sign-return-address", BPI.getSignReturnAddrStr()); if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { Fn->addFnAttr("sign-return-address-key", diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index d7d175ff1724f7..5d42e6286e525b 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -152,13 +152,7 @@ class ARMTargetCodeGenInfo : public TargetCodeGenInfo { diag::warn_target_unsupported_branch_protection_attribute) << Arch; } else { - static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; - assert(static_cast(BPI.SignReturnAddr) <= 2 && - "Unexpected SignReturnAddressScopeKind"); - Fn->addFnAttr( - "sign-return-address", - SignReturnAddrStr[static_cast(BPI.SignReturnAddr)]); - + Fn->addFnAttr("sign-return-address", BPI.getSignReturnAddrStr()); Fn->addFnAttr("branch-target-enforcement", BPI.BranchTargetEnforcement ? "true" : "false"); } diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index b79934e2ab96d2..3328e48cba147f 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -237,11 +237,18 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) ExtraOpts.push_back("relro"); } - // Android ARM/AArch64 use max-page-size=4096 to reduce VMA usage. Note, lld - // from 11 onwards default max-page-size to 65536 for both ARM and AArch64. - if ((Triple.isARM() || Triple.isAArch64()) && Triple.isAndroid()) { - ExtraOpts.push_back("-z"); - ExtraOpts.push_back("max-page-size=4096"); + // Note, lld from 11 onwards default max-page-size to 65536 for both ARM and + // AArch64. + if (Triple.isAndroid()) { + if (Triple.isARM()) { + // Android ARM uses max-page-size=4096 to reduce VMA usage. + ExtraOpts.push_back("-z"); + ExtraOpts.push_back("max-page-size=4096"); + } else if (Triple.isAArch64()) { + // Android AArch64 uses max-page-size=16384 to support 4k/16k page sizes. + ExtraOpts.push_back("-z"); + ExtraOpts.push_back("max-page-size=16384"); + } } if (GCCInstallation.getParentLibPath().contains("opt/rh/")) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 08e5d981a4a4ca..0aa8651ba80dc4 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -317,6 +317,74 @@ double3 floor(double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor) double4 floor(double4); +//===----------------------------------------------------------------------===// +// frac builtins +//===----------------------------------------------------------------------===// + +/// \fn T frac(T x) +/// \brief Returns the fractional (or decimal) part of x. \a x parameter. +/// \param x The specified input value. +/// +/// If \a the return value is greater than or equal to 0 and less than 1. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half frac(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half2 frac(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half3 frac(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +half4 frac(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float frac(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float2 frac(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float3 frac(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac) +float4 frac(float4); + +//===----------------------------------------------------------------------===// +// lerp builtins +//===----------------------------------------------------------------------===// + +/// \fn T lerp(T x, T y, T s) +/// \brief Returns the linear interpolation of x to y by s. +/// \param x [in] The first-floating point value. +/// \param y [in] The second-floating point value. +/// \param s [in] A value that linearly interpolates between the x parameter and +/// the y parameter. +/// +/// Linear interpolation is based on the following formula: x*(1-s) + y*s which +/// can equivalently be written as x + s(y-x). + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half lerp(half, half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half2 lerp(half2, half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half3 lerp(half3, half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +half4 lerp(half4, half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float lerp(float, float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float2 lerp(float2, float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float3 lerp(float3, float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) +float4 lerp(float4, float4, float4); + //===----------------------------------------------------------------------===// // log builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp index ec3892e92f3c3b..6722878883be8e 100644 --- a/clang/lib/Sema/JumpDiagnostics.cpp +++ b/clang/lib/Sema/JumpDiagnostics.cpp @@ -604,6 +604,16 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, break; } + case Stmt::OpenACCComputeConstructClass: { + unsigned NewParentScope = Scopes.size(); + OpenACCComputeConstruct *CC = cast(S); + Scopes.push_back(GotoScope( + ParentScope, diag::note_acc_branch_into_compute_construct, + diag::note_acc_branch_out_of_compute_construct, CC->getBeginLoc())); + BuildScopeInformation(CC->getStructuredBlock(), NewParentScope); + return; + } + default: if (auto *ED = dyn_cast(S)) { if (!ED->isStandaloneDirective()) { @@ -936,11 +946,16 @@ void JumpScopeChecker::CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc, if (Scopes[I].InDiag == diag::note_protected_by_seh_finally) { S.Diag(From->getBeginLoc(), diag::warn_jump_out_of_seh_finally); break; - } - if (Scopes[I].InDiag == diag::note_omp_protected_structured_block) { + } else if (Scopes[I].InDiag == + diag::note_omp_protected_structured_block) { S.Diag(From->getBeginLoc(), diag::err_goto_into_protected_scope); S.Diag(To->getBeginLoc(), diag::note_omp_exits_structured_block); break; + } else if (Scopes[I].InDiag == + diag::note_acc_branch_into_compute_construct) { + S.Diag(From->getBeginLoc(), diag::err_goto_into_protected_scope); + S.Diag(Scopes[I].Loc, diag::note_acc_branch_out_of_compute_construct); + return; } } } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 739e5ad872e235..716f3237a22fb1 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5197,49 +5197,72 @@ bool Sema::CheckPPCMMAType(QualType Type, SourceLocation TypeLoc) { bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { assert(TheCall->getNumArgs() > 1); ExprResult A = TheCall->getArg(0); - ExprResult B = TheCall->getArg(1); + QualType ArgTyA = A.get()->getType(); - QualType ArgTyB = B.get()->getType(); + auto *VecTyA = ArgTyA->getAs(); - auto *VecTyB = ArgTyB->getAs(); SourceLocation BuiltinLoc = TheCall->getBeginLoc(); - if (VecTyA == nullptr && VecTyB == nullptr) - return false; - if (VecTyA && VecTyB) { - bool retValue = false; - if (VecTyA->getElementType() != VecTyB->getElementType()) { - // Note: type promotion is intended to be handeled via the intrinsics - // and not the builtin itself. - S->Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_incompatible_vector) - << TheCall->getDirectCallee() - << SourceRange(A.get()->getBeginLoc(), B.get()->getEndLoc()); - retValue = true; - } - if (VecTyA->getNumElements() != VecTyB->getNumElements()) { - // if we get here a HLSLVectorTruncation is needed. - S->Diag(BuiltinLoc, diag::err_vec_builtin_incompatible_vector) - << TheCall->getDirectCallee() - << SourceRange(TheCall->getArg(0)->getBeginLoc(), - TheCall->getArg(1)->getEndLoc()); - retValue = true; - } + for (unsigned i = 1; i < TheCall->getNumArgs(); ++i) { + ExprResult B = TheCall->getArg(i); + QualType ArgTyB = B.get()->getType(); + auto *VecTyB = ArgTyB->getAs(); + if (VecTyA == nullptr && VecTyB == nullptr) + return false; - if (retValue) - TheCall->setType(VecTyA->getElementType()); + if (VecTyA && VecTyB) { + bool retValue = false; + if (VecTyA->getElementType() != VecTyB->getElementType()) { + // Note: type promotion is intended to be handeled via the intrinsics + // and not the builtin itself. + S->Diag(TheCall->getBeginLoc(), + diag::err_vec_builtin_incompatible_vector_all) + << TheCall->getDirectCallee() + << SourceRange(A.get()->getBeginLoc(), B.get()->getEndLoc()); + retValue = true; + } + if (VecTyA->getNumElements() != VecTyB->getNumElements()) { + // if we get here a HLSLVectorTruncation is needed. + S->Diag(BuiltinLoc, diag::err_vec_builtin_incompatible_vector_all) + << TheCall->getDirectCallee() + << SourceRange(TheCall->getArg(0)->getBeginLoc(), + TheCall->getArg(1)->getEndLoc()); + retValue = true; + } + + if (!retValue) + TheCall->setType(VecTyA->getElementType()); - return retValue; + return retValue; + } } // Note: if we get here one of the args is a scalar which // requires a VectorSplat on Arg0 or Arg1 - S->Diag(BuiltinLoc, diag::err_vec_builtin_non_vector) + S->Diag(BuiltinLoc, diag::err_vec_builtin_non_vector_all) << TheCall->getDirectCallee() << SourceRange(TheCall->getArg(0)->getBeginLoc(), TheCall->getArg(1)->getEndLoc()); return true; } +bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { + QualType ExpectedType = S->Context.FloatTy; + for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { + QualType PassedType = TheCall->getArg(i)->getType(); + if (!PassedType->hasFloatingRepresentation()) { + if (auto *VecTyA = PassedType->getAs()) + ExpectedType = S->Context.getVectorType( + ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); + S->Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << PassedType << ExpectedType << 1 << 0 << 0; + return true; + } + } + return false; +} + // Note: returning true in this case results in CheckBuiltinFunctionCall // returning an ExprError bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { @@ -5253,6 +5276,24 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_frac: { + if (PrepareBuiltinElementwiseMathOneArgCall(TheCall)) + return true; + if (CheckAllArgsHaveFloatRepresentation(this, TheCall)) + return true; + break; + } + case Builtin::BI__builtin_hlsl_lerp: { + if (checkArgCount(*this, TheCall, 3)) + return true; + if (CheckVectorElementCallArgs(this, TheCall)) + return true; + if (SemaBuiltinElementwiseTernaryMath(TheCall)) + return true; + if (CheckAllArgsHaveFloatRepresentation(this, TheCall)) + return true; + break; + } } return false; } @@ -16543,6 +16584,17 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E, } } + // Complain if we are converting a lambda expression to a boolean value + if (const auto *MCallExpr = dyn_cast(E)) { + if (const auto *MRecordDecl = MCallExpr->getRecordDecl(); + MRecordDecl && MRecordDecl->isLambda()) { + Diag(E->getExprLoc(), diag::warn_impcast_pointer_to_bool) + << /*LambdaPointerConversionOperatorType=*/3 + << MRecordDecl->getSourceRange() << Range << IsEqual; + return; + } + } + // Expect to find a single Decl. Skip anything more complicated. ValueDecl *D = nullptr; if (DeclRefExpr *R = dyn_cast(E)) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 59758d3bd6d1a3..c4750ce78fa9c1 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1220,7 +1220,7 @@ static QualType adjustCVQualifiersForCXXThisWithinLambda( : nullptr; } } - return ASTCtx.getPointerType(ClassType); + return ThisTy; } QualType Sema::getCurrentThisType() { diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 73af8ae6925663..bd450ca8281260 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -567,6 +567,11 @@ Sema::ActOnLabelStmt(SourceLocation IdentLoc, LabelDecl *TheDecl, Diag(IdentLoc, diag::warn_reserved_extern_symbol) << TheDecl << static_cast(Status); + // If this label is in a compute construct scope, we need to make sure we + // check gotos in/out. + if (getCurScope()->isInOpenACCComputeConstructScope()) + setFunctionHasBranchProtectedScope(); + // Otherwise, things are good. Fill in the declaration and return it. LabelStmt *LS = new (Context) LabelStmt(IdentLoc, TheDecl, SubStmt); TheDecl->setStmt(LS); @@ -3307,6 +3312,12 @@ StmtResult Sema::ActOnGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc, LabelDecl *TheDecl) { setFunctionHasBranchIntoScope(); + + // If this goto is in a compute construct scope, we need to make sure we check + // gotos in/out. + if (getCurScope()->isInOpenACCComputeConstructScope()) + setFunctionHasBranchProtectedScope(); + TheDecl->markUsed(Context); return new (Context) GotoStmt(TheDecl, GotoLoc, LabelLoc); } @@ -3335,6 +3346,11 @@ Sema::ActOnIndirectGotoStmt(SourceLocation GotoLoc, SourceLocation StarLoc, setFunctionHasIndirectGoto(); + // If this goto is in a compute construct scope, we need to make sure we + // check gotos in/out. + if (getCurScope()->isInOpenACCComputeConstructScope()) + setFunctionHasBranchProtectedScope(); + return new (Context) IndirectGotoStmt(GotoLoc, StarLoc, E); } diff --git a/clang/test/CXX/drs/dr18xx.cpp b/clang/test/CXX/drs/dr18xx.cpp index a7cee4ef8902f9..e78730e8992cf8 100644 --- a/clang/test/CXX/drs/dr18xx.cpp +++ b/clang/test/CXX/drs/dr18xx.cpp @@ -282,6 +282,7 @@ namespace dr1837 { // dr1837: 3.3 struct A { int f(); bool b = [] { + // since-cxx11-warning@-1 {{address of lambda function pointer conversion operator will always evaluate to 'true'}} struct Local { static_assert(sizeof(this->f()) == sizeof(int), ""); }; diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm index cb56f6816ad036..e93c37f3b9ae12 100644 --- a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm +++ b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm @@ -65,10 +65,10 @@ void nesting() { namespace overloading { void bool_conversion() { - if ([](){}) { + if ([](){}) { // expected-warning{{address of lambda function pointer conversion operator will always evaluate to 'true'}} } - bool b = []{}; + bool b = []{}; // expected-warning{{address of lambda function pointer conversion operator will always evaluate to 'true'}} b = (bool)[]{}; } @@ -108,8 +108,9 @@ void call_with_lambda() { using decltype(a)::operator id; // expected-note {{here}} } extern d; - bool r1 = c; - bool r2 = d; // expected-error {{private}} + bool r1 = c; // expected-warning{{address of lambda function pointer conversion operator will always evaluate to 'true'}} + bool r2 = d; // expected-error {{private}} \ + expected-warning{{address of lambda function pointer conversion operator will always evaluate to 'true'}} } namespace PR13117 { diff --git a/clang/test/CodeGenHLSL/builtins/abs.hlsl b/clang/test/CodeGenHLSL/builtins/abs.hlsl index 54c9d1a9dded45..ad65cab2721a2b 100644 --- a/clang/test/CodeGenHLSL/builtins/abs.hlsl +++ b/clang/test/CodeGenHLSL/builtins/abs.hlsl @@ -1,141 +1,93 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF using hlsl::abs; #ifdef __HLSL_ENABLE_16_BIT -// CHECK: define noundef i16 @ -// CHECK: call i16 @llvm.abs.i16( -int16_t test_abs_int16_t ( int16_t p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <2 x i16> @ -// CHECK: call <2 x i16> @llvm.abs.v2i16( -int16_t2 test_abs_int16_t2 ( int16_t2 p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <3 x i16> @ -// CHECK: call <3 x i16> @llvm.abs.v3i16( -int16_t3 test_abs_int16_t3 ( int16_t3 p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <4 x i16> @ -// CHECK: call <4 x i16> @llvm.abs.v4i16( -int16_t4 test_abs_int16_t4 ( int16_t4 p0 ) { - return abs ( p0 ); -} +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.abs.i16( +int16_t test_abs_int16_t(int16_t p0) { return abs(p0); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.abs.v2i16( +int16_t2 test_abs_int16_t2(int16_t2 p0) { return abs(p0); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.abs.v3i16( +int16_t3 test_abs_int16_t3(int16_t3 p0) { return abs(p0); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.abs.v4i16( +int16_t4 test_abs_int16_t4(int16_t4 p0) { return abs(p0); } #endif // __HLSL_ENABLE_16_BIT -// CHECK: define noundef half @ -// CHECK: call half @llvm.fabs.f16( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.fabs.f16( // NO_HALF: define noundef float @"?test_abs_half@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.fabs.f32(float %0) -half test_abs_half ( half p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.fabs.v2f16( +half test_abs_half(half p0) { return abs(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.fabs.v2f16( // NO_HALF: define noundef <2 x float> @"?test_abs_half2@@YAT?$__vector@$halff@$01@__clang@@T12@@Z"( // NO_HALF: call <2 x float> @llvm.fabs.v2f32( -half2 test_abs_half2 ( half2 p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.fabs.v3f16( +half2 test_abs_half2(half2 p0) { return abs(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.fabs.v3f16( // NO_HALF: define noundef <3 x float> @"?test_abs_half3@@YAT?$__vector@$halff@$02@__clang@@T12@@Z"( // NO_HALF: call <3 x float> @llvm.fabs.v3f32( -half3 test_abs_half3 ( half3 p0 ) { - return abs ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.fabs.v4f16( +half3 test_abs_half3(half3 p0) { return abs(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.fabs.v4f16( // NO_HALF: define noundef <4 x float> @"?test_abs_half4@@YAT?$__vector@$halff@$03@__clang@@T12@@Z"( // NO_HALF: call <4 x float> @llvm.fabs.v4f32( -half4 test_abs_half4 ( half4 p0 ) { - return abs ( p0 ); -} +half4 test_abs_half4(half4 p0) { return abs(p0); } // CHECK: define noundef i32 @ // CHECK: call i32 @llvm.abs.i32( -// NO_HALF: define noundef i32 @"?test_abs_int@@YAHH@Z" -int test_abs_int ( int p0 ) { - return abs ( p0 ); -} +int test_abs_int(int p0) { return abs(p0); } // CHECK: define noundef <2 x i32> @ // CHECK: call <2 x i32> @llvm.abs.v2i32( -int2 test_abs_int2 ( int2 p0 ) { - return abs ( p0 ); -} +int2 test_abs_int2(int2 p0) { return abs(p0); } // CHECK: define noundef <3 x i32> @ // CHECK: call <3 x i32> @llvm.abs.v3i32( -int3 test_abs_int3 ( int3 p0 ) { - return abs ( p0 ); -} +int3 test_abs_int3(int3 p0) { return abs(p0); } // CHECK: define noundef <4 x i32> @ // CHECK: call <4 x i32> @llvm.abs.v4i32( -int4 test_abs_int4 ( int4 p0 ) { - return abs ( p0 ); -} +int4 test_abs_int4(int4 p0) { return abs(p0); } // CHECK: define noundef float @ // CHECK: call float @llvm.fabs.f32( -float test_abs_float ( float p0 ) { - return abs ( p0 ); -} +float test_abs_float(float p0) { return abs(p0); } // CHECK: define noundef <2 x float> @ // CHECK: call <2 x float> @llvm.fabs.v2f32( -float2 test_abs_float2 ( float2 p0 ) { - return abs ( p0 ); -} +float2 test_abs_float2(float2 p0) { return abs(p0); } // CHECK: define noundef <3 x float> @ // CHECK: call <3 x float> @llvm.fabs.v3f32( -float3 test_abs_float3 ( float3 p0 ) { - return abs ( p0 ); -} +float3 test_abs_float3(float3 p0) { return abs(p0); } // CHECK: define noundef <4 x float> @ // CHECK: call <4 x float> @llvm.fabs.v4f32( -float4 test_abs_float4 ( float4 p0 ) { - return abs ( p0 ); -} +float4 test_abs_float4(float4 p0) { return abs(p0); } // CHECK: define noundef i64 @ // CHECK: call i64 @llvm.abs.i64( -int64_t test_abs_int64_t ( int64_t p0 ) { - return abs ( p0 ); -} +int64_t test_abs_int64_t(int64_t p0) { return abs(p0); } // CHECK: define noundef <2 x i64> @ // CHECK: call <2 x i64> @llvm.abs.v2i64( -int64_t2 test_abs_int64_t2 ( int64_t2 p0 ) { - return abs ( p0 ); -} +int64_t2 test_abs_int64_t2(int64_t2 p0) { return abs(p0); } // CHECK: define noundef <3 x i64> @ // CHECK: call <3 x i64> @llvm.abs.v3i64( -int64_t3 test_abs_int64_t3 ( int64_t3 p0 ) { - return abs ( p0 ); -} +int64_t3 test_abs_int64_t3(int64_t3 p0) { return abs(p0); } // CHECK: define noundef <4 x i64> @ // CHECK: call <4 x i64> @llvm.abs.v4i64( -int64_t4 test_abs_int64_t4 ( int64_t4 p0 ) { - return abs ( p0 ); -} +int64_t4 test_abs_int64_t4(int64_t4 p0) { return abs(p0); } // CHECK: define noundef double @ // CHECK: call double @llvm.fabs.f64( -double test_abs_double ( double p0 ) { - return abs ( p0 ); -} +double test_abs_double(double p0) { return abs(p0); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.fabs.v2f64( -double2 test_abs_double2 ( double2 p0 ) { - return abs ( p0 ); -} +double2 test_abs_double2(double2 p0) { return abs(p0); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.fabs.v3f64( -double3 test_abs_double3 ( double3 p0 ) { - return abs ( p0 ); -} +double3 test_abs_double3(double3 p0) { return abs(p0); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.fabs.v4f64( -double4 test_abs_double4 ( double4 p0 ) { - return abs ( p0 ); -} +double4 test_abs_double4(double4 p0) { return abs(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/ceil.hlsl b/clang/test/CodeGenHLSL/builtins/ceil.hlsl index f1672816e72bc2..06d0d4c2cf546d 100644 --- a/clang/test/CodeGenHLSL/builtins/ceil.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ceil.hlsl @@ -1,79 +1,56 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF using hlsl::ceil; -// CHECK: define noundef half @ -// CHECK: call half @llvm.ceil.f16( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.ceil.f16( // NO_HALF: define noundef float @"?test_ceil_half@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.ceil.f32(float %0) -half test_ceil_half ( half p0 ) { - return ceil ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.ceil.v2f16( +half test_ceil_half(half p0) { return ceil(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.ceil.v2f16( // NO_HALF: define noundef <2 x float> @"?test_ceil_half2@@YAT?$__vector@$halff@$01@__clang@@T12@@Z"( // NO_HALF: call <2 x float> @llvm.ceil.v2f32( -half2 test_ceil_half2 ( half2 p0 ) { - return ceil ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.ceil.v3f16( +half2 test_ceil_half2(half2 p0) { return ceil(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.ceil.v3f16( // NO_HALF: define noundef <3 x float> @"?test_ceil_half3@@YAT?$__vector@$halff@$02@__clang@@T12@@Z"( // NO_HALF: call <3 x float> @llvm.ceil.v3f32( -half3 test_ceil_half3 ( half3 p0 ) { - return ceil ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.ceil.v4f16( +half3 test_ceil_half3(half3 p0) { return ceil(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.ceil.v4f16( // NO_HALF: define noundef <4 x float> @"?test_ceil_half4@@YAT?$__vector@$halff@$03@__clang@@T12@@Z"( // NO_HALF: call <4 x float> @llvm.ceil.v4f32( -half4 test_ceil_half4 ( half4 p0 ) { - return ceil ( p0 ); -} +half4 test_ceil_half4(half4 p0) { return ceil(p0); } // CHECK: define noundef float @ // CHECK: call float @llvm.ceil.f32( -float test_ceil_float ( float p0 ) { - return ceil ( p0 ); -} +float test_ceil_float(float p0) { return ceil(p0); } // CHECK: define noundef <2 x float> @ // CHECK: call <2 x float> @llvm.ceil.v2f32( -float2 test_ceil_float2 ( float2 p0 ) { - return ceil ( p0 ); -} +float2 test_ceil_float2(float2 p0) { return ceil(p0); } // CHECK: define noundef <3 x float> @ // CHECK: call <3 x float> @llvm.ceil.v3f32( -float3 test_ceil_float3 ( float3 p0 ) { - return ceil ( p0 ); -} +float3 test_ceil_float3(float3 p0) { return ceil(p0); } // CHECK: define noundef <4 x float> @ // CHECK: call <4 x float> @llvm.ceil.v4f32( -float4 test_ceil_float4 ( float4 p0 ) { - return ceil ( p0 ); -} +float4 test_ceil_float4(float4 p0) { return ceil(p0); } // CHECK: define noundef double @ // CHECK: call double @llvm.ceil.f64( -double test_ceil_double ( double p0 ) { - return ceil ( p0 ); -} +double test_ceil_double(double p0) { return ceil(p0); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.ceil.v2f64( -double2 test_ceil_double2 ( double2 p0 ) { - return ceil ( p0 ); -} +double2 test_ceil_double2(double2 p0) { return ceil(p0); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.ceil.v3f64( -double3 test_ceil_double3 ( double3 p0 ) { - return ceil ( p0 ); -} +double3 test_ceil_double3(double3 p0) { return ceil(p0); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.ceil.v4f64( -double4 test_ceil_double4 ( double4 p0 ) { - return ceil ( p0 ); -} +double4 test_ceil_double4(double4 p0) { return ceil(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/cos.hlsl b/clang/test/CodeGenHLSL/builtins/cos.hlsl index 2fc1571949b2c5..fb416fcaa49d76 100644 --- a/clang/test/CodeGenHLSL/builtins/cos.hlsl +++ b/clang/test/CodeGenHLSL/builtins/cos.hlsl @@ -1,56 +1,41 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.cos.f16( -// NO_HALF: define noundef float @"?test_cos_half@@YA$halff@$halff@@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.cos.f16( +// NO_HALF: define noundef float @"?test_cos_half // NO_HALF: call float @llvm.cos.f32( -half test_cos_half ( half p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.cos.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_cos_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_cos_half(half p0) { return cos(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.cos.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_cos_half2 // NO_HALF: call <2 x float> @llvm.cos.v2f32( -half2 test_cos_half2 ( half2 p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.cos.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_cos_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_cos_half2(half2 p0) { return cos(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.cos.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_cos_half3 // NO_HALF: call <3 x float> @llvm.cos.v3f32( -half3 test_cos_half3 ( half3 p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.cos.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_cos_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_cos_half3(half3 p0) { return cos(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.cos.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_cos_half4 // NO_HALF: call <4 x float> @llvm.cos.v4f32( -half4 test_cos_half4 ( half4 p0 ) { - return cos ( p0 ); -} +half4 test_cos_half4(half4 p0) { return cos(p0); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_cos_float // CHECK: call float @llvm.cos.f32( -float test_cos_float ( float p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <2 x float> @ +float test_cos_float(float p0) { return cos(p0); } +// CHECK: define noundef <2 x float> @"?test_cos_float2 // CHECK: call <2 x float> @llvm.cos.v2f32 -float2 test_cos_float2 ( float2 p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_cos_float2(float2 p0) { return cos(p0); } +// CHECK: define noundef <3 x float> @"?test_cos_float3 // CHECK: call <3 x float> @llvm.cos.v3f32 -float3 test_cos_float3 ( float3 p0 ) { - return cos ( p0 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_cos_float3(float3 p0) { return cos(p0); } +// CHECK: define noundef <4 x float> @"?test_cos_float4 // CHECK: call <4 x float> @llvm.cos.v4f32 -float4 test_cos_float4 ( float4 p0 ) { - return cos ( p0 ); -} +float4 test_cos_float4(float4 p0) { return cos(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/dot.hlsl b/clang/test/CodeGenHLSL/builtins/dot.hlsl index b2c1bae31d13b1..c064d118caf3e7 100644 --- a/clang/test/CodeGenHLSL/builtins/dot.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot.hlsl @@ -9,230 +9,160 @@ #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF: %dx.dot = mul i16 %0, %1 // NATIVE_HALF: ret i16 %dx.dot -int16_t test_dot_short ( int16_t p0, int16_t p1 ) { - return dot ( p0, p1 ); -} +int16_t test_dot_short(int16_t p0, int16_t p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v2i16(<2 x i16> %0, <2 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -int16_t test_dot_short2 ( int16_t2 p0, int16_t2 p1 ) { - return dot ( p0, p1 ); -} +int16_t test_dot_short2(int16_t2 p0, int16_t2 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v3i16(<3 x i16> %0, <3 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -int16_t test_dot_short3 ( int16_t3 p0, int16_t3 p1 ) { - return dot ( p0, p1 ); -} +int16_t test_dot_short3(int16_t3 p0, int16_t3 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v4i16(<4 x i16> %0, <4 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -int16_t test_dot_short4 ( int16_t4 p0, int16_t4 p1 ) { - return dot ( p0, p1 ); -} +int16_t test_dot_short4(int16_t4 p0, int16_t4 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = mul i16 %0, %1 // NATIVE_HALF: ret i16 %dx.dot -uint16_t test_dot_ushort ( uint16_t p0, uint16_t p1 ) { - return dot ( p0, p1 ); -} +uint16_t test_dot_ushort(uint16_t p0, uint16_t p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v2i16(<2 x i16> %0, <2 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -uint16_t test_dot_ushort2 ( uint16_t2 p0, uint16_t2 p1 ) { - return dot ( p0, p1 ); -} +uint16_t test_dot_ushort2(uint16_t2 p0, uint16_t2 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v3i16(<3 x i16> %0, <3 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -uint16_t test_dot_ushort3 ( uint16_t3 p0, uint16_t3 p1 ) { - return dot ( p0, p1 ); -} +uint16_t test_dot_ushort3(uint16_t3 p0, uint16_t3 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call i16 @llvm.dx.dot.v4i16(<4 x i16> %0, <4 x i16> %1) // NATIVE_HALF: ret i16 %dx.dot -uint16_t test_dot_ushort4 ( uint16_t4 p0, uint16_t4 p1 ) { - return dot ( p0, p1 ); -} +uint16_t test_dot_ushort4(uint16_t4 p0, uint16_t4 p1) { return dot(p0, p1); } #endif // CHECK: %dx.dot = mul i32 %0, %1 // CHECK: ret i32 %dx.dot -int test_dot_int ( int p0, int p1 ) { - return dot ( p0, p1 ); -} +int test_dot_int(int p0, int p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v2i32(<2 x i32> %0, <2 x i32> %1) // CHECK: ret i32 %dx.dot -int test_dot_int2 ( int2 p0, int2 p1 ) { - return dot ( p0, p1 ); -} +int test_dot_int2(int2 p0, int2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v3i32(<3 x i32> %0, <3 x i32> %1) // CHECK: ret i32 %dx.dot -int test_dot_int3 ( int3 p0, int3 p1 ) { - return dot ( p0, p1 ); -} +int test_dot_int3(int3 p0, int3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v4i32(<4 x i32> %0, <4 x i32> %1) // CHECK: ret i32 %dx.dot -int test_dot_int4 ( int4 p0, int4 p1 ) { - return dot ( p0, p1 ); -} +int test_dot_int4(int4 p0, int4 p1) { return dot(p0, p1); } // CHECK: %dx.dot = mul i32 %0, %1 // CHECK: ret i32 %dx.dot -uint test_dot_uint ( uint p0, uint p1 ) { - return dot ( p0, p1 ); -} +uint test_dot_uint(uint p0, uint p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v2i32(<2 x i32> %0, <2 x i32> %1) // CHECK: ret i32 %dx.dot -uint test_dot_uint2 ( uint2 p0, uint2 p1 ) { - return dot ( p0, p1 ); -} +uint test_dot_uint2(uint2 p0, uint2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v3i32(<3 x i32> %0, <3 x i32> %1) // CHECK: ret i32 %dx.dot -uint test_dot_uint3 ( uint3 p0, uint3 p1 ) { - return dot ( p0, p1 ); -} +uint test_dot_uint3(uint3 p0, uint3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i32 @llvm.dx.dot.v4i32(<4 x i32> %0, <4 x i32> %1) // CHECK: ret i32 %dx.dot -uint test_dot_uint4 ( uint4 p0, uint4 p1 ) { - return dot ( p0, p1 ); -} +uint test_dot_uint4(uint4 p0, uint4 p1) { return dot(p0, p1); } // CHECK: %dx.dot = mul i64 %0, %1 // CHECK: ret i64 %dx.dot -int64_t test_dot_long ( int64_t p0, int64_t p1 ) { - return dot ( p0, p1 ); -} +int64_t test_dot_long(int64_t p0, int64_t p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v2i64(<2 x i64> %0, <2 x i64> %1) // CHECK: ret i64 %dx.dot -int64_t test_dot_long2 ( int64_t2 p0, int64_t2 p1 ) { - return dot ( p0, p1 ); -} +int64_t test_dot_long2(int64_t2 p0, int64_t2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v3i64(<3 x i64> %0, <3 x i64> %1) // CHECK: ret i64 %dx.dot -int64_t test_dot_long3 ( int64_t3 p0, int64_t3 p1 ) { - return dot ( p0, p1 ); -} +int64_t test_dot_long3(int64_t3 p0, int64_t3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v4i64(<4 x i64> %0, <4 x i64> %1) // CHECK: ret i64 %dx.dot -int64_t test_dot_long4 ( int64_t4 p0, int64_t4 p1 ) { - return dot ( p0, p1 ); -} +int64_t test_dot_long4(int64_t4 p0, int64_t4 p1) { return dot(p0, p1); } // CHECK: %dx.dot = mul i64 %0, %1 // CHECK: ret i64 %dx.dot -uint64_t test_dot_ulong ( uint64_t p0, uint64_t p1 ) { - return dot ( p0, p1 ); -} +uint64_t test_dot_ulong(uint64_t p0, uint64_t p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v2i64(<2 x i64> %0, <2 x i64> %1) // CHECK: ret i64 %dx.dot -uint64_t test_dot_ulong2 ( uint64_t2 p0, uint64_t2 p1 ) { - return dot ( p0, p1 ); -} +uint64_t test_dot_ulong2(uint64_t2 p0, uint64_t2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v3i64(<3 x i64> %0, <3 x i64> %1) // CHECK: ret i64 %dx.dot -uint64_t test_dot_ulong3 ( uint64_t3 p0, uint64_t3 p1 ) { - return dot ( p0, p1 ); -} +uint64_t test_dot_ulong3(uint64_t3 p0, uint64_t3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call i64 @llvm.dx.dot.v4i64(<4 x i64> %0, <4 x i64> %1) // CHECK: ret i64 %dx.dot -uint64_t test_dot_ulong4 ( uint64_t4 p0, uint64_t4 p1 ) { - return dot ( p0, p1 ); -} +uint64_t test_dot_ulong4(uint64_t4 p0, uint64_t4 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = fmul half %0, %1 // NATIVE_HALF: ret half %dx.dot // NO_HALF: %dx.dot = fmul float %0, %1 // NO_HALF: ret float %dx.dot -half test_dot_half ( half p0, half p1 ) { - return dot ( p0, p1 ); -} +half test_dot_half(half p0, half p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call half @llvm.dx.dot.v2f16(<2 x half> %0, <2 x half> %1) // NATIVE_HALF: ret half %dx.dot // NO_HALF: %dx.dot = call float @llvm.dx.dot.v2f32(<2 x float> %0, <2 x float> %1) // NO_HALF: ret float %dx.dot -half test_dot_half2 ( half2 p0, half2 p1 ) { - return dot ( p0, p1 ); -} +half test_dot_half2(half2 p0, half2 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call half @llvm.dx.dot.v3f16(<3 x half> %0, <3 x half> %1) // NATIVE_HALF: ret half %dx.dot // NO_HALF: %dx.dot = call float @llvm.dx.dot.v3f32(<3 x float> %0, <3 x float> %1) // NO_HALF: ret float %dx.dot -half test_dot_half3 ( half3 p0, half3 p1 ) { - return dot ( p0, p1 ); -} +half test_dot_half3(half3 p0, half3 p1) { return dot(p0, p1); } // NATIVE_HALF: %dx.dot = call half @llvm.dx.dot.v4f16(<4 x half> %0, <4 x half> %1) // NATIVE_HALF: ret half %dx.dot // NO_HALF: %dx.dot = call float @llvm.dx.dot.v4f32(<4 x float> %0, <4 x float> %1) // NO_HALF: ret float %dx.dot -half test_dot_half4 ( half4 p0, half4 p1 ) { - return dot ( p0, p1 ); -} +half test_dot_half4(half4 p0, half4 p1) { return dot(p0, p1); } // CHECK: %dx.dot = fmul float %0, %1 // CHECK: ret float %dx.dot -float test_dot_float ( float p0, float p1 ) { - return dot ( p0, p1 ); -} +float test_dot_float(float p0, float p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v2f32(<2 x float> %0, <2 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float2 ( float2 p0, float2 p1 ) { - return dot ( p0, p1 ); -} +float test_dot_float2(float2 p0, float2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v3f32(<3 x float> %0, <3 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float3 ( float3 p0, float3 p1 ) { - return dot ( p0, p1 ); -} +float test_dot_float3(float3 p0, float3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v4f32(<4 x float> %0, <4 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float4 ( float4 p0, float4 p1) { - return dot ( p0, p1 ); -} +float test_dot_float4(float4 p0, float4 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v2f32(<2 x float> %splat.splat, <2 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float2_splat ( float p0, float2 p1 ) { - return dot( p0, p1 ); -} +float test_dot_float2_splat(float p0, float2 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v3f32(<3 x float> %splat.splat, <3 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float3_splat ( float p0, float3 p1 ) { - return dot( p0, p1 ); -} +float test_dot_float3_splat(float p0, float3 p1) { return dot(p0, p1); } // CHECK: %dx.dot = call float @llvm.dx.dot.v4f32(<4 x float> %splat.splat, <4 x float> %1) // CHECK: ret float %dx.dot -float test_dot_float4_splat ( float p0, float4 p1 ) { - return dot( p0, p1 ); -} +float test_dot_float4_splat(float p0, float4 p1) { return dot(p0, p1); } // CHECK: %conv = sitofp i32 %1 to float // CHECK: %splat.splatinsert = insertelement <2 x float> poison, float %conv, i64 0 // CHECK: %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer // CHECK: %dx.dot = call float @llvm.dx.dot.v2f32(<2 x float> %0, <2 x float> %splat.splat) // CHECK: ret float %dx.dot -float test_builtin_dot_float2_int_splat ( float2 p0, int p1 ) { - return dot ( p0, p1 ); +float test_builtin_dot_float2_int_splat(float2 p0, int p1) { + return dot(p0, p1); } // CHECK: %conv = sitofp i32 %1 to float @@ -240,26 +170,24 @@ float test_builtin_dot_float2_int_splat ( float2 p0, int p1 ) { // CHECK: %splat.splat = shufflevector <3 x float> %splat.splatinsert, <3 x float> poison, <3 x i32> zeroinitializer // CHECK: %dx.dot = call float @llvm.dx.dot.v3f32(<3 x float> %0, <3 x float> %splat.splat) // CHECK: ret float %dx.dot -float test_builtin_dot_float3_int_splat ( float3 p0, int p1 ) { - return dot ( p0, p1 ); +float test_builtin_dot_float3_int_splat(float3 p0, int p1) { + return dot(p0, p1); } // CHECK: %dx.dot = fmul double %0, %1 // CHECK: ret double %dx.dot -double test_dot_double ( double p0, double p1 ) { - return dot ( p0, p1 ); -} +double test_dot_double(double p0, double p1) { return dot(p0, p1); } // CHECK: %conv = zext i1 %tobool to i32 // CHECK: %dx.dot = mul i32 %conv, %1 // CHECK: ret i32 %dx.dot -int test_dot_bool_scalar_arg0_type_promotion ( bool p0, int p1 ) { - return dot ( p0, p1 ); +int test_dot_bool_scalar_arg0_type_promotion(bool p0, int p1) { + return dot(p0, p1); } // CHECK: %conv = zext i1 %tobool to i32 // CHECK: %dx.dot = mul i32 %0, %conv // CHECK: ret i32 %dx.dot -int test_dot_bool_scalar_arg1_type_promotion ( int p0, bool p1 ) { - return dot ( p0, p1 ); +int test_dot_bool_scalar_arg1_type_promotion(int p0, bool p1) { + return dot(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/floor.hlsl b/clang/test/CodeGenHLSL/builtins/floor.hlsl index 357661761b762a..d2a2f6e52f1ec3 100644 --- a/clang/test/CodeGenHLSL/builtins/floor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/floor.hlsl @@ -1,79 +1,56 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF using hlsl::floor; -// CHECK: define noundef half @ -// CHECK: call half @llvm.floor.f16( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.floor.f16( // NO_HALF: define noundef float @"?test_floor_half@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.floor.f32(float %0) -half test_floor_half ( half p0 ) { - return floor ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.floor.v2f16( +half test_floor_half(half p0) { return floor(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.floor.v2f16( // NO_HALF: define noundef <2 x float> @"?test_floor_half2@@YAT?$__vector@$halff@$01@__clang@@T12@@Z"( // NO_HALF: call <2 x float> @llvm.floor.v2f32( -half2 test_floor_half2 ( half2 p0 ) { - return floor ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.floor.v3f16( +half2 test_floor_half2(half2 p0) { return floor(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.floor.v3f16( // NO_HALF: define noundef <3 x float> @"?test_floor_half3@@YAT?$__vector@$halff@$02@__clang@@T12@@Z"( // NO_HALF: call <3 x float> @llvm.floor.v3f32( -half3 test_floor_half3 ( half3 p0 ) { - return floor ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.floor.v4f16( +half3 test_floor_half3(half3 p0) { return floor(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.floor.v4f16( // NO_HALF: define noundef <4 x float> @"?test_floor_half4@@YAT?$__vector@$halff@$03@__clang@@T12@@Z"( // NO_HALF: call <4 x float> @llvm.floor.v4f32( -half4 test_floor_half4 ( half4 p0 ) { - return floor ( p0 ); -} +half4 test_floor_half4(half4 p0) { return floor(p0); } // CHECK: define noundef float @ // CHECK: call float @llvm.floor.f32( -float test_floor_float ( float p0 ) { - return floor ( p0 ); -} +float test_floor_float(float p0) { return floor(p0); } // CHECK: define noundef <2 x float> @ // CHECK: call <2 x float> @llvm.floor.v2f32( -float2 test_floor_float2 ( float2 p0 ) { - return floor ( p0 ); -} +float2 test_floor_float2(float2 p0) { return floor(p0); } // CHECK: define noundef <3 x float> @ // CHECK: call <3 x float> @llvm.floor.v3f32( -float3 test_floor_float3 ( float3 p0 ) { - return floor ( p0 ); -} +float3 test_floor_float3(float3 p0) { return floor(p0); } // CHECK: define noundef <4 x float> @ // CHECK: call <4 x float> @llvm.floor.v4f32( -float4 test_floor_float4 ( float4 p0 ) { - return floor ( p0 ); -} +float4 test_floor_float4(float4 p0) { return floor(p0); } // CHECK: define noundef double @ // CHECK: call double @llvm.floor.f64( -double test_floor_double ( double p0 ) { - return floor ( p0 ); -} +double test_floor_double(double p0) { return floor(p0); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.floor.v2f64( -double2 test_floor_double2 ( double2 p0 ) { - return floor ( p0 ); -} +double2 test_floor_double2(double2 p0) { return floor(p0); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.floor.v3f64( -double3 test_floor_double3 ( double3 p0 ) { - return floor ( p0 ); -} +double3 test_floor_double3(double3 p0) { return floor(p0); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.floor.v4f64( -double4 test_floor_double4 ( double4 p0 ) { - return floor ( p0 ); -} +double4 test_floor_double4(double4 p0) { return floor(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/frac.hlsl b/clang/test/CodeGenHLSL/builtins/frac.hlsl new file mode 100644 index 00000000000000..7c4d1468e96d27 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/frac.hlsl @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: %dx.frac = call half @llvm.dx.frac.f16( +// NATIVE_HALF: ret half %dx.frac +// NO_HALF: define noundef float @"?test_frac_half@@YA$halff@$halff@@Z"( +// NO_HALF: %dx.frac = call float @llvm.dx.frac.f32( +// NO_HALF: ret float %dx.frac +half test_frac_half(half p0) { return frac(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: %dx.frac = call <2 x half> @llvm.dx.frac.v2f16 +// NATIVE_HALF: ret <2 x half> %dx.frac +// NO_HALF: define noundef <2 x float> @ +// NO_HALF: %dx.frac = call <2 x float> @llvm.dx.frac.v2f32( +// NO_HALF: ret <2 x float> %dx.frac +half2 test_frac_half2(half2 p0) { return frac(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: %dx.frac = call <3 x half> @llvm.dx.frac.v3f16 +// NATIVE_HALF: ret <3 x half> %dx.frac +// NO_HALF: define noundef <3 x float> @ +// NO_HALF: %dx.frac = call <3 x float> @llvm.dx.frac.v3f32( +// NO_HALF: ret <3 x float> %dx.frac +half3 test_frac_half3(half3 p0) { return frac(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: %dx.frac = call <4 x half> @llvm.dx.frac.v4f16 +// NATIVE_HALF: ret <4 x half> %dx.frac +// NO_HALF: define noundef <4 x float> @ +// NO_HALF: %dx.frac = call <4 x float> @llvm.dx.frac.v4f32( +// NO_HALF: ret <4 x float> %dx.frac +half4 test_frac_half4(half4 p0) { return frac(p0); } + +// CHECK: define noundef float @ +// CHECK: %dx.frac = call float @llvm.dx.frac.f32( +// CHECK: ret float %dx.frac +float test_frac_float(float p0) { return frac(p0); } +// CHECK: define noundef <2 x float> @ +// CHECK: %dx.frac = call <2 x float> @llvm.dx.frac.v2f32 +// CHECK: ret <2 x float> %dx.frac +float2 test_frac_float2(float2 p0) { return frac(p0); } +// CHECK: define noundef <3 x float> @ +// CHECK: %dx.frac = call <3 x float> @llvm.dx.frac.v3f32 +// CHECK: ret <3 x float> %dx.frac +float3 test_frac_float3(float3 p0) { return frac(p0); } +// CHECK: define noundef <4 x float> @ +// CHECK: %dx.frac = call <4 x float> @llvm.dx.frac.v4f32 +// CHECK: ret <4 x float> %dx.frac +float4 test_frac_float4(float4 p0) { return frac(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl new file mode 100644 index 00000000000000..1f16dec68212e4 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lerp-builtin.hlsl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s + + + +// CHECK-LABEL: builtin_lerp_half_scalar +// CHECK: %3 = fsub double %conv1, %conv +// CHECK: %4 = fmul double %conv2, %3 +// CHECK: %dx.lerp = fadd double %conv, %4 +// CHECK: %conv3 = fptrunc double %dx.lerp to half +// CHECK: ret half %conv3 +half builtin_lerp_half_scalar (half p0) { + return __builtin_hlsl_lerp ( p0, p0, p0 ); +} + +// CHECK-LABEL: builtin_lerp_float_scalar +// CHECK: %3 = fsub double %conv1, %conv +// CHECK: %4 = fmul double %conv2, %3 +// CHECK: %dx.lerp = fadd double %conv, %4 +// CHECK: %conv3 = fptrunc double %dx.lerp to float +// CHECK: ret float %conv3 +float builtin_lerp_float_scalar ( float p0) { + return __builtin_hlsl_lerp ( p0, p0, p0 ); +} + +// CHECK-LABEL: builtin_lerp_half_vector +// CHECK: %dx.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) +// CHECK: ret <3 x half> %dx.lerp +half3 builtin_lerp_half_vector (half3 p0) { + return __builtin_hlsl_lerp ( p0, p0, p0 ); +} + +// CHECK-LABEL: builtin_lerp_floar_vector +// CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// CHECK: ret <2 x float> %dx.lerp +float2 builtin_lerp_floar_vector ( float2 p0) { + return __builtin_hlsl_lerp ( p0, p0, p0 ); +} diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl new file mode 100644 index 00000000000000..a6b3d9643d674c --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF + +// NATIVE_HALF: %3 = fsub half %1, %0 +// NATIVE_HALF: %4 = fmul half %2, %3 +// NATIVE_HALF: %dx.lerp = fadd half %0, %4 +// NATIVE_HALF: ret half %dx.lerp +// NO_HALF: %3 = fsub float %1, %0 +// NO_HALF: %4 = fmul float %2, %3 +// NO_HALF: %dx.lerp = fadd float %0, %4 +// NO_HALF: ret float %dx.lerp +half test_lerp_half(half p0) { return lerp(p0, p0, p0); } + +// NATIVE_HALF: %dx.lerp = call <2 x half> @llvm.dx.lerp.v2f16(<2 x half> %0, <2 x half> %1, <2 x half> %2) +// NATIVE_HALF: ret <2 x half> %dx.lerp +// NO_HALF: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// NO_HALF: ret <2 x float> %dx.lerp +half2 test_lerp_half2(half2 p0, half2 p1) { return lerp(p0, p0, p0); } + +// NATIVE_HALF: %dx.lerp = call <3 x half> @llvm.dx.lerp.v3f16(<3 x half> %0, <3 x half> %1, <3 x half> %2) +// NATIVE_HALF: ret <3 x half> %dx.lerp +// NO_HALF: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) +// NO_HALF: ret <3 x float> %dx.lerp +half3 test_lerp_half3(half3 p0, half3 p1) { return lerp(p0, p0, p0); } + +// NATIVE_HALF: %dx.lerp = call <4 x half> @llvm.dx.lerp.v4f16(<4 x half> %0, <4 x half> %1, <4 x half> %2) +// NATIVE_HALF: ret <4 x half> %dx.lerp +// NO_HALF: %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) +// NO_HALF: ret <4 x float> %dx.lerp +half4 test_lerp_half4(half4 p0, half4 p1) { return lerp(p0, p0, p0); } + +// CHECK: %3 = fsub float %1, %0 +// CHECK: %4 = fmul float %2, %3 +// CHECK: %dx.lerp = fadd float %0, %4 +// CHECK: ret float %dx.lerp +float test_lerp_float(float p0, float p1) { return lerp(p0, p0, p0); } + +// CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %2) +// CHECK: ret <2 x float> %dx.lerp +float2 test_lerp_float2(float2 p0, float2 p1) { return lerp(p0, p0, p0); } + +// CHECK: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %2) +// CHECK: ret <3 x float> %dx.lerp +float3 test_lerp_float3(float3 p0, float3 p1) { return lerp(p0, p0, p0); } + +// CHECK: %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2) +// CHECK: ret <4 x float> %dx.lerp +float4 test_lerp_float4(float4 p0, float4 p1) { return lerp(p0, p0, p0); } + +// CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %splat.splat, <2 x float> %1, <2 x float> %2) +// CHECK: ret <2 x float> %dx.lerp +float2 test_lerp_float2_splat(float p0, float2 p1) { return lerp(p0, p1, p1); } + +// CHECK: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %splat.splat, <3 x float> %1, <3 x float> %2) +// CHECK: ret <3 x float> %dx.lerp +float3 test_lerp_float3_splat(float p0, float3 p1) { return lerp(p0, p1, p1); } + +// CHECK: %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %splat.splat, <4 x float> %1, <4 x float> %2) +// CHECK: ret <4 x float> %dx.lerp +float4 test_lerp_float4_splat(float p0, float4 p1) { return lerp(p0, p1, p1); } + +// CHECK: %conv = sitofp i32 %2 to float +// CHECK: %splat.splatinsert = insertelement <2 x float> poison, float %conv, i64 0 +// CHECK: %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer +// CHECK: %dx.lerp = call <2 x float> @llvm.dx.lerp.v2f32(<2 x float> %0, <2 x float> %1, <2 x float> %splat.splat) +// CHECK: ret <2 x float> %dx.lerp +float2 test_lerp_float2_int_splat(float2 p0, int p1) { + return lerp(p0, p0, p1); +} + +// CHECK: %conv = sitofp i32 %2 to float +// CHECK: %splat.splatinsert = insertelement <3 x float> poison, float %conv, i64 0 +// CHECK: %splat.splat = shufflevector <3 x float> %splat.splatinsert, <3 x float> poison, <3 x i32> zeroinitializer +// CHECK: %dx.lerp = call <3 x float> @llvm.dx.lerp.v3f32(<3 x float> %0, <3 x float> %1, <3 x float> %splat.splat) +// CHECK: ret <3 x float> %dx.lerp +float3 test_lerp_float3_int_splat(float3 p0, int p1) { + return lerp(p0, p0, p1); +} diff --git a/clang/test/CodeGenHLSL/builtins/log.hlsl b/clang/test/CodeGenHLSL/builtins/log.hlsl index 6a8e4ac2e5f294..ecbdf1e98ac346 100644 --- a/clang/test/CodeGenHLSL/builtins/log.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log.hlsl @@ -1,56 +1,41 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.log.f16( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.log.f16( // NO_HALF: define noundef float @"?test_log_half@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.log.f32( -half test_log_half ( half p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.log.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_log_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_log_half(half p0) { return log(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.log.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_log_half2 // NO_HALF: call <2 x float> @llvm.log.v2f32( -half2 test_log_half2 ( half2 p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.log.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_log_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_log_half2(half2 p0) { return log(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.log.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_log_half3 // NO_HALF: call <3 x float> @llvm.log.v3f32( -half3 test_log_half3 ( half3 p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.log.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_log_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_log_half3(half3 p0) { return log(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.log.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_log_half4 // NO_HALF: call <4 x float> @llvm.log.v4f32( -half4 test_log_half4 ( half4 p0 ) { - return log ( p0 ); -} +half4 test_log_half4(half4 p0) { return log(p0); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_log_float // CHECK: call float @llvm.log.f32( -float test_log_float ( float p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <2 x float> @ +float test_log_float(float p0) { return log(p0); } +// CHECK: define noundef <2 x float> @"?test_log_float2 // CHECK: call <2 x float> @llvm.log.v2f32 -float2 test_log_float2 ( float2 p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_log_float2(float2 p0) { return log(p0); } +// CHECK: define noundef <3 x float> @"?test_log_float3 // CHECK: call <3 x float> @llvm.log.v3f32 -float3 test_log_float3 ( float3 p0 ) { - return log ( p0 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_log_float3(float3 p0) { return log(p0); } +// CHECK: define noundef <4 x float> @"?test_log_float4 // CHECK: call <4 x float> @llvm.log.v4f32 -float4 test_log_float4 ( float4 p0 ) { - return log ( p0 ); -} +float4 test_log_float4(float4 p0) { return log(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/log10.hlsl b/clang/test/CodeGenHLSL/builtins/log10.hlsl index 8ce24fd530dd3c..638b86e8d5eaf7 100644 --- a/clang/test/CodeGenHLSL/builtins/log10.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log10.hlsl @@ -1,56 +1,41 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.log10.f16( -// NO_HALF: define noundef float @"?test_log10_half@@YA$halff@$halff@@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.log10.f16( +// NO_HALF: define noundef float @"?test_log10_half // NO_HALF: call float @llvm.log10.f32( -half test_log10_half ( half p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.log10.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_log10_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_log10_half(half p0) { return log10(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.log10.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_log10_half2 // NO_HALF: call <2 x float> @llvm.log10.v2f32( -half2 test_log10_half2 ( half2 p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.log10.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_log10_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_log10_half2(half2 p0) { return log10(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.log10.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_log10_half3 // NO_HALF: call <3 x float> @llvm.log10.v3f32( -half3 test_log10_half3 ( half3 p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.log10.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_log10_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_log10_half3(half3 p0) { return log10(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.log10.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_log10_half4 // NO_HALF: call <4 x float> @llvm.log10.v4f32( -half4 test_log10_half4 ( half4 p0 ) { - return log10 ( p0 ); -} +half4 test_log10_half4(half4 p0) { return log10(p0); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_log10_float // CHECK: call float @llvm.log10.f32( -float test_log10_float ( float p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <2 x float> @ +float test_log10_float(float p0) { return log10(p0); } +// CHECK: define noundef <2 x float> @"?test_log10_float2 // CHECK: call <2 x float> @llvm.log10.v2f32 -float2 test_log10_float2 ( float2 p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_log10_float2(float2 p0) { return log10(p0); } +// CHECK: define noundef <3 x float> @"?test_log10_float3 // CHECK: call <3 x float> @llvm.log10.v3f32 -float3 test_log10_float3 ( float3 p0 ) { - return log10 ( p0 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_log10_float3(float3 p0) { return log10(p0); } +// CHECK: define noundef <4 x float> @"?test_log10_float4 // CHECK: call <4 x float> @llvm.log10.v4f32 -float4 test_log10_float4 ( float4 p0 ) { - return log10 ( p0 ); -} +float4 test_log10_float4(float4 p0) { return log10(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/log2.hlsl b/clang/test/CodeGenHLSL/builtins/log2.hlsl index f0f0a6c7c50e81..9ed8185a06b04f 100644 --- a/clang/test/CodeGenHLSL/builtins/log2.hlsl +++ b/clang/test/CodeGenHLSL/builtins/log2.hlsl @@ -1,56 +1,41 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.log2.f16( -// NO_HALF: define noundef float @"?test_log2_half@@YA$halff@$halff@@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.log2.f16( +// NO_HALF: define noundef float @"?test_log2_half // NO_HALF: call float @llvm.log2.f32( -half test_log2_half ( half p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.log2.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_log2_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_log2_half(half p0) { return log2(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.log2.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_log2_half2 // NO_HALF: call <2 x float> @llvm.log2.v2f32( -half2 test_log2_half2 ( half2 p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.log2.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_log2_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_log2_half2(half2 p0) { return log2(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.log2.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_log2_half3 // NO_HALF: call <3 x float> @llvm.log2.v3f32( -half3 test_log2_half3 ( half3 p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.log2.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_log2_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_log2_half3(half3 p0) { return log2(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.log2.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_log2_half4 // NO_HALF: call <4 x float> @llvm.log2.v4f32( -half4 test_log2_half4 ( half4 p0 ) { - return log2 ( p0 ); -} +half4 test_log2_half4(half4 p0) { return log2(p0); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_log2_float // CHECK: call float @llvm.log2.f32( -float test_log2_float ( float p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <2 x float> @ +float test_log2_float(float p0) { return log2(p0); } +// CHECK: define noundef <2 x float> @"?test_log2_float2 // CHECK: call <2 x float> @llvm.log2.v2f32 -float2 test_log2_float2 ( float2 p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_log2_float2(float2 p0) { return log2(p0); } +// CHECK: define noundef <3 x float> @"?test_log2_float3 // CHECK: call <3 x float> @llvm.log2.v3f32 -float3 test_log2_float3 ( float3 p0 ) { - return log2 ( p0 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_log2_float3(float3 p0) { return log2(p0); } +// CHECK: define noundef <4 x float> @"?test_log2_float4 // CHECK: call <4 x float> @llvm.log2.v4f32 -float4 test_log2_float4 ( float4 p0 ) { - return log2 ( p0 ); -} +float4 test_log2_float4(float4 p0) { return log2(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/max.hlsl b/clang/test/CodeGenHLSL/builtins/max.hlsl index d8879c3332fb88..272d1e8a10bd7c 100644 --- a/clang/test/CodeGenHLSL/builtins/max.hlsl +++ b/clang/test/CodeGenHLSL/builtins/max.hlsl @@ -1,206 +1,134 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF #ifdef __HLSL_ENABLE_16_BIT -// CHECK: define noundef i16 @ -// CHECK: call i16 @llvm.smax.i16( -int16_t test_max_short ( int16_t p0, int16_t p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <2 x i16> @ -// CHECK: call <2 x i16> @llvm.smax.v2i16( -int16_t2 test_max_short2 ( int16_t2 p0, int16_t2 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <3 x i16> @ -// CHECK: call <3 x i16> @llvm.smax.v3i16 -int16_t3 test_max_short3 ( int16_t3 p0, int16_t3 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <4 x i16> @ -// CHECK: call <4 x i16> @llvm.smax.v4i16 -int16_t4 test_max_short4 ( int16_t4 p0, int16_t4 p1 ) { - return max ( p0, p1 ); -} +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.smax.i16( +int16_t test_max_short(int16_t p0, int16_t p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.smax.v2i16( +int16_t2 test_max_short2(int16_t2 p0, int16_t2 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.smax.v3i16 +int16_t3 test_max_short3(int16_t3 p0, int16_t3 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.smax.v4i16 +int16_t4 test_max_short4(int16_t4 p0, int16_t4 p1) { return max(p0, p1); } -// CHECK: define noundef i16 @ -// CHECK: call i16 @llvm.umax.i16( -uint16_t test_max_ushort ( uint16_t p0, uint16_t p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <2 x i16> @ -// CHECK: call <2 x i16> @llvm.umax.v2i16 -uint16_t2 test_max_ushort2 ( uint16_t2 p0, uint16_t2 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <3 x i16> @ -// CHECK: call <3 x i16> @llvm.umax.v3i16 -uint16_t3 test_max_ushort3 ( uint16_t3 p0, uint16_t3 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <4 x i16> @ -// CHECK: call <4 x i16> @llvm.umax.v4i16 -uint16_t4 test_max_ushort4 ( uint16_t4 p0, uint16_t4 p1 ) { - return max ( p0, p1 ); -} +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.umax.i16( +uint16_t test_max_ushort(uint16_t p0, uint16_t p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.umax.v2i16 +uint16_t2 test_max_ushort2(uint16_t2 p0, uint16_t2 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.umax.v3i16 +uint16_t3 test_max_ushort3(uint16_t3 p0, uint16_t3 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.umax.v4i16 +uint16_t4 test_max_ushort4(uint16_t4 p0, uint16_t4 p1) { return max(p0, p1); } #endif // CHECK: define noundef i32 @ // CHECK: call i32 @llvm.smax.i32( -int test_max_int ( int p0, int p1 ) { - return max ( p0, p1 ); -} +int test_max_int(int p0, int p1) { return max(p0, p1); } // CHECK: define noundef <2 x i32> @ // CHECK: call <2 x i32> @llvm.smax.v2i32 -int2 test_max_int2 ( int2 p0, int2 p1 ) { - return max ( p0, p1 ); -} +int2 test_max_int2(int2 p0, int2 p1) { return max(p0, p1); } // CHECK: define noundef <3 x i32> @ // CHECK: call <3 x i32> @llvm.smax.v3i32 -int3 test_max_int3 ( int3 p0, int3 p1 ) { - return max ( p0, p1 ); -} +int3 test_max_int3(int3 p0, int3 p1) { return max(p0, p1); } // CHECK: define noundef <4 x i32> @ // CHECK: call <4 x i32> @llvm.smax.v4i32 -int4 test_max_int4 ( int4 p0, int4 p1) { - return max ( p0, p1 ); -} +int4 test_max_int4(int4 p0, int4 p1) { return max(p0, p1); } // CHECK: define noundef i32 @ // CHECK: call i32 @llvm.umax.i32( -int test_max_uint ( uint p0, uint p1 ) { - return max ( p0, p1 ); -} +int test_max_uint(uint p0, uint p1) { return max(p0, p1); } // CHECK: define noundef <2 x i32> @ // CHECK: call <2 x i32> @llvm.umax.v2i32 -uint2 test_max_uint2 ( uint2 p0, uint2 p1 ) { - return max ( p0, p1 ); -} +uint2 test_max_uint2(uint2 p0, uint2 p1) { return max(p0, p1); } // CHECK: define noundef <3 x i32> @ // CHECK: call <3 x i32> @llvm.umax.v3i32 -uint3 test_max_uint3 ( uint3 p0, uint3 p1 ) { - return max ( p0, p1 ); -} +uint3 test_max_uint3(uint3 p0, uint3 p1) { return max(p0, p1); } // CHECK: define noundef <4 x i32> @ // CHECK: call <4 x i32> @llvm.umax.v4i32 -uint4 test_max_uint4 ( uint4 p0, uint4 p1) { - return max ( p0, p1 ); -} +uint4 test_max_uint4(uint4 p0, uint4 p1) { return max(p0, p1); } // CHECK: define noundef i64 @ // CHECK: call i64 @llvm.smax.i64( -int64_t test_max_long ( int64_t p0, int64_t p1 ) { - return max ( p0, p1 ); -} +int64_t test_max_long(int64_t p0, int64_t p1) { return max(p0, p1); } // CHECK: define noundef <2 x i64> @ // CHECK: call <2 x i64> @llvm.smax.v2i64 -int64_t2 test_max_long2 ( int64_t2 p0, int64_t2 p1 ) { - return max ( p0, p1 ); -} +int64_t2 test_max_long2(int64_t2 p0, int64_t2 p1) { return max(p0, p1); } // CHECK: define noundef <3 x i64> @ // CHECK: call <3 x i64> @llvm.smax.v3i64 -int64_t3 test_max_long3 ( int64_t3 p0, int64_t3 p1 ) { - return max ( p0, p1 ); -} +int64_t3 test_max_long3(int64_t3 p0, int64_t3 p1) { return max(p0, p1); } // CHECK: define noundef <4 x i64> @ // CHECK: call <4 x i64> @llvm.smax.v4i64 -int64_t4 test_max_long4 ( int64_t4 p0, int64_t4 p1) { - return max ( p0, p1 ); -} +int64_t4 test_max_long4(int64_t4 p0, int64_t4 p1) { return max(p0, p1); } // CHECK: define noundef i64 @ // CHECK: call i64 @llvm.umax.i64( -uint64_t test_max_long ( uint64_t p0, uint64_t p1 ) { - return max ( p0, p1 ); -} +uint64_t test_max_long(uint64_t p0, uint64_t p1) { return max(p0, p1); } // CHECK: define noundef <2 x i64> @ // CHECK: call <2 x i64> @llvm.umax.v2i64 -uint64_t2 test_max_long2 ( uint64_t2 p0, uint64_t2 p1 ) { - return max ( p0, p1 ); -} +uint64_t2 test_max_long2(uint64_t2 p0, uint64_t2 p1) { return max(p0, p1); } // CHECK: define noundef <3 x i64> @ // CHECK: call <3 x i64> @llvm.umax.v3i64 -uint64_t3 test_max_long3 ( uint64_t3 p0, uint64_t3 p1 ) { - return max ( p0, p1 ); -} +uint64_t3 test_max_long3(uint64_t3 p0, uint64_t3 p1) { return max(p0, p1); } // CHECK: define noundef <4 x i64> @ // CHECK: call <4 x i64> @llvm.umax.v4i64 -uint64_t4 test_max_long4 ( uint64_t4 p0, uint64_t4 p1) { - return max ( p0, p1 ); -} +uint64_t4 test_max_long4(uint64_t4 p0, uint64_t4 p1) { return max(p0, p1); } - -// CHECK: define noundef half @ -// CHECK: call half @llvm.maxnum.f16( -// NO_HALF: define noundef float @"?test_max_half@@YA$halff@$halff@0@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.maxnum.f16( +// NO_HALF: define noundef float @"?test_max_half // NO_HALF: call float @llvm.maxnum.f32( -half test_max_half ( half p0, half p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.maxnum.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_max_float2@@YAT?$__vector@M$01@__clang@@T12@0@Z"( +half test_max_half(half p0, half p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.maxnum.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_max_half2 // NO_HALF: call <2 x float> @llvm.maxnum.v2f32( -half2 test_max_half2 ( half2 p0, half2 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.maxnum.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_max_float3@@YAT?$__vector@M$02@__clang@@T12@0@Z"( +half2 test_max_half2(half2 p0, half2 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.maxnum.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_max_half3 // NO_HALF: call <3 x float> @llvm.maxnum.v3f32( -half3 test_max_half3 ( half3 p0, half3 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.maxnum.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_max_float4@@YAT?$__vector@M$03@__clang@@T12@0@Z"( +half3 test_max_half3(half3 p0, half3 p1) { return max(p0, p1); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.maxnum.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_max_half4 // NO_HALF: call <4 x float> @llvm.maxnum.v4f32( -half4 test_max_half4 ( half4 p0, half4 p1 ) { - return max ( p0, p1 ); -} +half4 test_max_half4(half4 p0, half4 p1) { return max(p0, p1); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_max_float // CHECK: call float @llvm.maxnum.f32( -float test_max_float ( float p0, float p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <2 x float> @ +float test_max_float(float p0, float p1) { return max(p0, p1); } +// CHECK: define noundef <2 x float> @"?test_max_float2 // CHECK: call <2 x float> @llvm.maxnum.v2f32 -float2 test_max_float2 ( float2 p0, float2 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_max_float2(float2 p0, float2 p1) { return max(p0, p1); } +// CHECK: define noundef <3 x float> @"?test_max_float3 // CHECK: call <3 x float> @llvm.maxnum.v3f32 -float3 test_max_float3 ( float3 p0, float3 p1 ) { - return max ( p0, p1 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_max_float3(float3 p0, float3 p1) { return max(p0, p1); } +// CHECK: define noundef <4 x float> @"?test_max_float4 // CHECK: call <4 x float> @llvm.maxnum.v4f32 -float4 test_max_float4 ( float4 p0, float4 p1) { - return max ( p0, p1 ); -} +float4 test_max_float4(float4 p0, float4 p1) { return max(p0, p1); } // CHECK: define noundef double @ // CHECK: call double @llvm.maxnum.f64( -double test_max_double ( double p0, double p1 ) { - return max ( p0, p1 ); -} +double test_max_double(double p0, double p1) { return max(p0, p1); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.maxnum.v2f64 -double2 test_max_double2 ( double2 p0, double2 p1 ) { - return max ( p0, p1 ); -} +double2 test_max_double2(double2 p0, double2 p1) { return max(p0, p1); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.maxnum.v3f64 -double3 test_max_double3 ( double3 p0, double3 p1 ) { - return max ( p0, p1 ); -} +double3 test_max_double3(double3 p0, double3 p1) { return max(p0, p1); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.maxnum.v4f64 -double4 test_max_double4 ( double4 p0, double4 p1) { - return max ( p0, p1 ); -} +double4 test_max_double4(double4 p0, double4 p1) { return max(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/min.hlsl b/clang/test/CodeGenHLSL/builtins/min.hlsl index 743053cbdd2620..a0c233dac4d5fc 100644 --- a/clang/test/CodeGenHLSL/builtins/min.hlsl +++ b/clang/test/CodeGenHLSL/builtins/min.hlsl @@ -1,207 +1,134 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF #ifdef __HLSL_ENABLE_16_BIT -// CHECK: define noundef i16 @ -// CHECK: call i16 @llvm.smin.i16( -int16_t test_min_short ( int16_t p0, int16_t p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <2 x i16> @ -// CHECK: call <2 x i16> @llvm.smin.v2i16( -int16_t2 test_min_short2 ( int16_t2 p0, int16_t2 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <3 x i16> @ -// CHECK: call <3 x i16> @llvm.smin.v3i16 -int16_t3 test_min_short3 ( int16_t3 p0, int16_t3 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <4 x i16> @ -// CHECK: call <4 x i16> @llvm.smin.v4i16 -int16_t4 test_min_short4 ( int16_t4 p0, int16_t4 p1 ) { - return min ( p0, p1 ); -} +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.smin.i16( +int16_t test_min_short(int16_t p0, int16_t p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.smin.v2i16( +int16_t2 test_min_short2(int16_t2 p0, int16_t2 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.smin.v3i16 +int16_t3 test_min_short3(int16_t3 p0, int16_t3 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.smin.v4i16 +int16_t4 test_min_short4(int16_t4 p0, int16_t4 p1) { return min(p0, p1); } - -// CHECK: define noundef i16 @ -// CHECK: call i16 @llvm.umin.i16( -uint16_t test_min_ushort ( uint16_t p0, uint16_t p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <2 x i16> @ -// CHECK: call <2 x i16> @llvm.umin.v2i16 -uint16_t2 test_min_ushort2 ( uint16_t2 p0, uint16_t2 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <3 x i16> @ -// CHECK: call <3 x i16> @llvm.umin.v3i16 -uint16_t3 test_min_ushort3 ( uint16_t3 p0, uint16_t3 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <4 x i16> @ -// CHECK: call <4 x i16> @llvm.umin.v4i16 -uint16_t4 test_min_ushort4 ( uint16_t4 p0, uint16_t4 p1 ) { - return min ( p0, p1 ); -} +// NATIVE_HALF: define noundef i16 @ +// NATIVE_HALF: call i16 @llvm.umin.i16( +uint16_t test_min_ushort(uint16_t p0, uint16_t p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <2 x i16> @ +// NATIVE_HALF: call <2 x i16> @llvm.umin.v2i16 +uint16_t2 test_min_ushort2(uint16_t2 p0, uint16_t2 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <3 x i16> @ +// NATIVE_HALF: call <3 x i16> @llvm.umin.v3i16 +uint16_t3 test_min_ushort3(uint16_t3 p0, uint16_t3 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <4 x i16> @ +// NATIVE_HALF: call <4 x i16> @llvm.umin.v4i16 +uint16_t4 test_min_ushort4(uint16_t4 p0, uint16_t4 p1) { return min(p0, p1); } #endif // CHECK: define noundef i32 @ // CHECK: call i32 @llvm.smin.i32( -int test_min_int ( int p0, int p1 ) { - return min ( p0, p1 ); -} +int test_min_int(int p0, int p1) { return min(p0, p1); } // CHECK: define noundef <2 x i32> @ // CHECK: call <2 x i32> @llvm.smin.v2i32 -int2 test_min_int2 ( int2 p0, int2 p1 ) { - return min ( p0, p1 ); -} +int2 test_min_int2(int2 p0, int2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x i32> @ // CHECK: call <3 x i32> @llvm.smin.v3i32 -int3 test_min_int3 ( int3 p0, int3 p1 ) { - return min ( p0, p1 ); -} +int3 test_min_int3(int3 p0, int3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x i32> @ // CHECK: call <4 x i32> @llvm.smin.v4i32 -int4 test_min_int4 ( int4 p0, int4 p1) { - return min ( p0, p1 ); -} +int4 test_min_int4(int4 p0, int4 p1) { return min(p0, p1); } // CHECK: define noundef i32 @ // CHECK: call i32 @llvm.umin.i32( -int test_min_uint ( uint p0, uint p1 ) { - return min ( p0, p1 ); -} +int test_min_uint(uint p0, uint p1) { return min(p0, p1); } // CHECK: define noundef <2 x i32> @ // CHECK: call <2 x i32> @llvm.umin.v2i32 -uint2 test_min_uint2 ( uint2 p0, uint2 p1 ) { - return min ( p0, p1 ); -} +uint2 test_min_uint2(uint2 p0, uint2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x i32> @ // CHECK: call <3 x i32> @llvm.umin.v3i32 -uint3 test_min_uint3 ( uint3 p0, uint3 p1 ) { - return min ( p0, p1 ); -} +uint3 test_min_uint3(uint3 p0, uint3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x i32> @ // CHECK: call <4 x i32> @llvm.umin.v4i32 -uint4 test_min_uint4 ( uint4 p0, uint4 p1) { - return min ( p0, p1 ); -} +uint4 test_min_uint4(uint4 p0, uint4 p1) { return min(p0, p1); } // CHECK: define noundef i64 @ // CHECK: call i64 @llvm.smin.i64( -int64_t test_min_long ( int64_t p0, int64_t p1 ) { - return min ( p0, p1 ); -} +int64_t test_min_long(int64_t p0, int64_t p1) { return min(p0, p1); } // CHECK: define noundef <2 x i64> @ // CHECK: call <2 x i64> @llvm.smin.v2i64 -int64_t2 test_min_long2 ( int64_t2 p0, int64_t2 p1 ) { - return min ( p0, p1 ); -} +int64_t2 test_min_long2(int64_t2 p0, int64_t2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x i64> @ // CHECK: call <3 x i64> @llvm.smin.v3i64 -int64_t3 test_min_long3 ( int64_t3 p0, int64_t3 p1 ) { - return min ( p0, p1 ); -} +int64_t3 test_min_long3(int64_t3 p0, int64_t3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x i64> @ // CHECK: call <4 x i64> @llvm.smin.v4i64 -int64_t4 test_min_long4 ( int64_t4 p0, int64_t4 p1) { - return min ( p0, p1 ); -} +int64_t4 test_min_long4(int64_t4 p0, int64_t4 p1) { return min(p0, p1); } // CHECK: define noundef i64 @ // CHECK: call i64 @llvm.umin.i64( -uint64_t test_min_long ( uint64_t p0, uint64_t p1 ) { - return min ( p0, p1 ); -} +uint64_t test_min_long(uint64_t p0, uint64_t p1) { return min(p0, p1); } // CHECK: define noundef <2 x i64> @ // CHECK: call <2 x i64> @llvm.umin.v2i64 -uint64_t2 test_min_long2 ( uint64_t2 p0, uint64_t2 p1 ) { - return min ( p0, p1 ); -} +uint64_t2 test_min_long2(uint64_t2 p0, uint64_t2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x i64> @ // CHECK: call <3 x i64> @llvm.umin.v3i64 -uint64_t3 test_min_long3 ( uint64_t3 p0, uint64_t3 p1 ) { - return min ( p0, p1 ); -} +uint64_t3 test_min_long3(uint64_t3 p0, uint64_t3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x i64> @ // CHECK: call <4 x i64> @llvm.umin.v4i64 -uint64_t4 test_min_long4 ( uint64_t4 p0, uint64_t4 p1) { - return min ( p0, p1 ); -} - +uint64_t4 test_min_long4(uint64_t4 p0, uint64_t4 p1) { return min(p0, p1); } -// CHECK: define noundef half @ -// CHECK: call half @llvm.minnum.f16( -// NO_HALF: define noundef float @"?test_min_half@@YA$halff@$halff@0@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.minnum.f16( +// NO_HALF: define noundef float @"?test_min_half // NO_HALF: call float @llvm.minnum.f32( -half test_min_half ( half p0, half p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.minnum.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_min_float2@@YAT?$__vector@M$01@__clang@@T12@0@Z"( +half test_min_half(half p0, half p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.minnum.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_min_half2 // NO_HALF: call <2 x float> @llvm.minnum.v2f32( -half2 test_min_half2 ( half2 p0, half2 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.minnum.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_min_float3@@YAT?$__vector@M$02@__clang@@T12@0@Z"( +half2 test_min_half2(half2 p0, half2 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.minnum.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_min_half3 // NO_HALF: call <3 x float> @llvm.minnum.v3f32( -half3 test_min_half3 ( half3 p0, half3 p1 ) { - return min ( p0, p1 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.minnum.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_min_float4@@YAT?$__vector@M$03@__clang@@T12@0@Z"( +half3 test_min_half3(half3 p0, half3 p1) { return min(p0, p1); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.minnum.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_min_half4 // NO_HALF: call <4 x float> @llvm.minnum.v4f32( -half4 test_min_half4 ( half4 p0, half4 p1 ) { - return min ( p0, p1 ); -} +half4 test_min_half4(half4 p0, half4 p1) { return min(p0, p1); } // CHECK: define noundef float @ // CHECK: call float @llvm.minnum.f32( -float test_min_float ( float p0, float p1 ) { - return min ( p0, p1 ); -} +float test_min_float(float p0, float p1) { return min(p0, p1); } // CHECK: define noundef <2 x float> @ // CHECK: call <2 x float> @llvm.minnum.v2f32 -float2 test_min_float2 ( float2 p0, float2 p1 ) { - return min ( p0, p1 ); -} +float2 test_min_float2(float2 p0, float2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x float> @ // CHECK: call <3 x float> @llvm.minnum.v3f32 -float3 test_min_float3 ( float3 p0, float3 p1 ) { - return min ( p0, p1 ); -} +float3 test_min_float3(float3 p0, float3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x float> @ // CHECK: call <4 x float> @llvm.minnum.v4f32 -float4 test_min_float4 ( float4 p0, float4 p1) { - return min ( p0, p1 ); -} +float4 test_min_float4(float4 p0, float4 p1) { return min(p0, p1); } // CHECK: define noundef double @ // CHECK: call double @llvm.minnum.f64( -double test_min_double ( double p0, double p1 ) { - return min ( p0, p1 ); -} +double test_min_double(double p0, double p1) { return min(p0, p1); } // CHECK: define noundef <2 x double> @ // CHECK: call <2 x double> @llvm.minnum.v2f64 -double2 test_min_double2 ( double2 p0, double2 p1 ) { - return min ( p0, p1 ); -} +double2 test_min_double2(double2 p0, double2 p1) { return min(p0, p1); } // CHECK: define noundef <3 x double> @ // CHECK: call <3 x double> @llvm.minnum.v3f64 -double3 test_min_double3 ( double3 p0, double3 p1 ) { - return min ( p0, p1 ); -} +double3 test_min_double3(double3 p0, double3 p1) { return min(p0, p1); } // CHECK: define noundef <4 x double> @ // CHECK: call <4 x double> @llvm.minnum.v4f64 -double4 test_min_double4 ( double4 p0, double4 p1) { - return min ( p0, p1 ); -} +double4 test_min_double4(double4 p0, double4 p1) { return min(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/pow.hlsl b/clang/test/CodeGenHLSL/builtins/pow.hlsl index 86bfe98058a6eb..e996ca2f336410 100644 --- a/clang/test/CodeGenHLSL/builtins/pow.hlsl +++ b/clang/test/CodeGenHLSL/builtins/pow.hlsl @@ -1,89 +1,54 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.pow.f16( -// NO_HALF: define noundef float @"?test_pow_half@@YA$halff@$halff@0@Z"( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.pow.f16( +// NO_HALF: define noundef float @"?test_pow_half // NO_HALF: call float @llvm.pow.f32( -half test_pow_half(half p0, half p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <2 x half> @"?test_pow_half2@@YAT?$__vector@$f16@$01@__clang@@T12@0@Z"( -// CHECK: call <2 x half> @llvm.pow.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_pow_float2@@YAT?$__vector@M$01@__clang@@T12@0@Z"( +half test_pow_half(half p0, half p1) { return pow(p0, p1); } +// NATIVE_HALF: define noundef <2 x half> @"?test_pow_half2 +// NATIVE_HALF: call <2 x half> @llvm.pow.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_pow_half2 // NO_HALF: call <2 x float> @llvm.pow.v2f32( -half2 test_pow_half2(half2 p0, half2 p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <3 x half> @"?test_pow_half3@@YAT?$__vector@$f16@$02@__clang@@T12@0@Z"( -// CHECK: call <3 x half> @llvm.pow.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_pow_float3@@YAT?$__vector@M$02@__clang@@T12@0@Z"( +half2 test_pow_half2(half2 p0, half2 p1) { return pow(p0, p1); } +// NATIVE_HALF: define noundef <3 x half> @"?test_pow_half3 +// NATIVE_HALF: call <3 x half> @llvm.pow.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_pow_half3 // NO_HALF: call <3 x float> @llvm.pow.v3f32( -half3 test_pow_half3(half3 p0, half3 p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <4 x half> @"?test_pow_half4@@YAT?$__vector@$f16@$03@__clang@@T12@0@Z"( -// CHECK: call <4 x half> @llvm.pow.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_pow_float4@@YAT?$__vector@M$03@__clang@@T12@0@Z"( +half3 test_pow_half3(half3 p0, half3 p1) { return pow(p0, p1); } +// NATIVE_HALF: define noundef <4 x half> @"?test_pow_half4 +// NATIVE_HALF: call <4 x half> @llvm.pow.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_pow_half4 // NO_HALF: call <4 x float> @llvm.pow.v4f32( -half4 test_pow_half4(half4 p0, half4 p1) -{ - return pow(p0, p1); -} +half4 test_pow_half4(half4 p0, half4 p1) { return pow(p0, p1); } -// CHECK: define noundef float @"?test_pow_float@@YAMMM@Z"( +// CHECK: define noundef float @"?test_pow_float // CHECK: call float @llvm.pow.f32( -float test_pow_float(float p0, float p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <2 x float> @"?test_pow_float2@@YAT?$__vector@M$01@__clang@@T12@0@Z"( +float test_pow_float(float p0, float p1) { return pow(p0, p1); } +// CHECK: define noundef <2 x float> @"?test_pow_float2 // CHECK: call <2 x float> @llvm.pow.v2f32 -float2 test_pow_float2(float2 p0, float2 p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <3 x float> @"?test_pow_float3@@YAT?$__vector@M$02@__clang@@T12@0@Z"( +float2 test_pow_float2(float2 p0, float2 p1) { return pow(p0, p1); } +// CHECK: define noundef <3 x float> @"?test_pow_float3 // CHECK: call <3 x float> @llvm.pow.v3f32 -float3 test_pow_float3(float3 p0, float3 p1) -{ - return pow(p0, p1); -} -// CHECK: define noundef <4 x float> @"?test_pow_float4@@YAT?$__vector@M$03@__clang@@T12@0@Z"( +float3 test_pow_float3(float3 p0, float3 p1) { return pow(p0, p1); } +// CHECK: define noundef <4 x float> @"?test_pow_float4 // CHECK: call <4 x float> @llvm.pow.v4f32 -float4 test_pow_float4(float4 p0, float4 p1) -{ - return pow(p0, p1); -} +float4 test_pow_float4(float4 p0, float4 p1) { return pow(p0, p1); } // CHECK: define noundef double @"?test_pow_double@@YANNN@Z"( // CHECK: call double @llvm.pow.f64( -double test_pow_double(double p0, double p1) -{ - return pow(p0, p1); -} +double test_pow_double(double p0, double p1) { return pow(p0, p1); } // CHECK: define noundef <2 x double> @"?test_pow_double2@@YAT?$__vector@N$01@__clang@@T12@0@Z"( // CHECK: call <2 x double> @llvm.pow.v2f64 -double2 test_pow_double2(double2 p0, double2 p1) -{ - return pow(p0, p1); -} +double2 test_pow_double2(double2 p0, double2 p1) { return pow(p0, p1); } // CHECK: define noundef <3 x double> @"?test_pow_double3@@YAT?$__vector@N$02@__clang@@T12@0@Z"( // CHECK: call <3 x double> @llvm.pow.v3f64 -double3 test_pow_double3(double3 p0, double3 p1) -{ - return pow(p0, p1); -} +double3 test_pow_double3(double3 p0, double3 p1) { return pow(p0, p1); } // CHECK: define noundef <4 x double> @"?test_pow_double4@@YAT?$__vector@N$03@__clang@@T12@0@Z"( // CHECK: call <4 x double> @llvm.pow.v4f64 -double4 test_pow_double4(double4 p0, double4 p1) -{ - return pow(p0, p1); -} +double4 test_pow_double4(double4 p0, double4 p1) { return pow(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/sin.hlsl b/clang/test/CodeGenHLSL/builtins/sin.hlsl index 2445e6063a7052..ffb52214913886 100644 --- a/clang/test/CodeGenHLSL/builtins/sin.hlsl +++ b/clang/test/CodeGenHLSL/builtins/sin.hlsl @@ -1,56 +1,41 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.sin.f16( +// NATIVE_HALF: define noundef half @ +// NATIVE_HALF: call half @llvm.sin.f16( // NO_HALF: define noundef float @"?test_sin_half@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.sin.f32( -half test_sin_half ( half p0 ) { - return sin ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.sin.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_sin_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_sin_half(half p0) { return sin(p0); } +// NATIVE_HALF: define noundef <2 x half> @ +// NATIVE_HALF: call <2 x half> @llvm.sin.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_sin_half2 // NO_HALF: call <2 x float> @llvm.sin.v2f32( -half2 test_sin_half2 ( half2 p0 ) { - return sin ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.sin.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_sin_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_sin_half2(half2 p0) { return sin(p0); } +// NATIVE_HALF: define noundef <3 x half> @ +// NATIVE_HALF: call <3 x half> @llvm.sin.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_sin_half3 // NO_HALF: call <3 x float> @llvm.sin.v3f32( -half3 test_sin_half3 ( half3 p0 ) { - return sin ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.sin.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_sin_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_sin_half3(half3 p0) { return sin(p0); } +// NATIVE_HALF: define noundef <4 x half> @ +// NATIVE_HALF: call <4 x half> @llvm.sin.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_sin_half4 // NO_HALF: call <4 x float> @llvm.sin.v4f32( -half4 test_sin_half4 ( half4 p0 ) { - return sin ( p0 ); -} +half4 test_sin_half4(half4 p0) { return sin(p0); } // CHECK: define noundef float @ // CHECK: call float @llvm.sin.f32( -float test_sin_float ( float p0 ) { - return sin ( p0 ); -} +float test_sin_float(float p0) { return sin(p0); } // CHECK: define noundef <2 x float> @ // CHECK: call <2 x float> @llvm.sin.v2f32 -float2 test_sin_float2 ( float2 p0 ) { - return sin ( p0 ); -} +float2 test_sin_float2(float2 p0) { return sin(p0); } // CHECK: define noundef <3 x float> @ // CHECK: call <3 x float> @llvm.sin.v3f32 -float3 test_sin_float3 ( float3 p0 ) { - return sin ( p0 ); -} +float3 test_sin_float3(float3 p0) { return sin(p0); } // CHECK: define noundef <4 x float> @ // CHECK: call <4 x float> @llvm.sin.v4f32 -float4 test_sin_float4 ( float4 p0 ) { - return sin ( p0 ); -} +float4 test_sin_float4(float4 p0) { return sin(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/trunc.hlsl b/clang/test/CodeGenHLSL/builtins/trunc.hlsl index 4ae3cd20257ec0..6078aae5f873fe 100644 --- a/clang/test/CodeGenHLSL/builtins/trunc.hlsl +++ b/clang/test/CodeGenHLSL/builtins/trunc.hlsl @@ -1,56 +1,47 @@ // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ -// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s --check-prefix=NO_HALF +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF -// CHECK: define noundef half @ -// CHECK: call half @llvm.trunc.f16( -// NO_HALF: define noundef float @"?test_trunc_half@@YA$halff@$halff@@Z"( +// NATIVE_HALF: define noundef half @"?test_trunc_half +// NATIVE_HALF: call half @llvm.trunc.f16( +// NO_HALF: define noundef float @"?test_trunc_half // NO_HALF: call float @llvm.trunc.f32( -half test_trunc_half ( half p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <2 x half> @ -// CHECK: call <2 x half> @llvm.trunc.v2f16 -// NO_HALF: define noundef <2 x float> @"?test_trunc_float2@@YAT?$__vector@M$01@__clang@@T12@@Z"( +half test_trunc_half(half p0) { return trunc(p0); } + +// NATIVE_HALF: define noundef <2 x half> @"?test_trunc_half2 +// NATIVE_HALF: call <2 x half> @llvm.trunc.v2f16 +// NO_HALF: define noundef <2 x float> @"?test_trunc_half2 // NO_HALF: call <2 x float> @llvm.trunc.v2f32( -half2 test_trunc_half2 ( half2 p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <3 x half> @ -// CHECK: call <3 x half> @llvm.trunc.v3f16 -// NO_HALF: define noundef <3 x float> @"?test_trunc_float3@@YAT?$__vector@M$02@__clang@@T12@@Z"( +half2 test_trunc_half2(half2 p0) { return trunc(p0); } + +// NATIVE_HALF: define noundef <3 x half> @"?test_trunc_half3 +// NATIVE_HALF: call <3 x half> @llvm.trunc.v3f16 +// NO_HALF: define noundef <3 x float> @"?test_trunc_half3 // NO_HALF: call <3 x float> @llvm.trunc.v3f32( -half3 test_trunc_half3 ( half3 p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <4 x half> @ -// CHECK: call <4 x half> @llvm.trunc.v4f16 -// NO_HALF: define noundef <4 x float> @"?test_trunc_float4@@YAT?$__vector@M$03@__clang@@T12@@Z"( +half3 test_trunc_half3(half3 p0) { return trunc(p0); } + +// NATIVE_HALF: define noundef <4 x half> @"?test_trunc_half4 +// NATIVE_HALF: call <4 x half> @llvm.trunc.v4f16 +// NO_HALF: define noundef <4 x float> @"?test_trunc_half4 // NO_HALF: call <4 x float> @llvm.trunc.v4f32( -half4 test_trunc_half4 ( half4 p0 ) { - return trunc ( p0 ); -} +half4 test_trunc_half4(half4 p0) { return trunc(p0); } -// CHECK: define noundef float @ +// CHECK: define noundef float @"?test_trunc_float // CHECK: call float @llvm.trunc.f32( -float test_trunc_float ( float p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <2 x float> @ +float test_trunc_float(float p0) { return trunc(p0); } + +// CHECK: define noundef <2 x float> @"?test_trunc_float2 // CHECK: call <2 x float> @llvm.trunc.v2f32 -float2 test_trunc_float2 ( float2 p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <3 x float> @ +float2 test_trunc_float2(float2 p0) { return trunc(p0); } + +// CHECK: define noundef <3 x float> @"?test_trunc_float3 // CHECK: call <3 x float> @llvm.trunc.v3f32 -float3 test_trunc_float3 ( float3 p0 ) { - return trunc ( p0 ); -} -// CHECK: define noundef <4 x float> @ +float3 test_trunc_float3(float3 p0) { return trunc(p0); } + +// CHECK: define noundef <4 x float> @"?test_trunc_float4 // CHECK: call <4 x float> @llvm.trunc.v4f32 -float4 test_trunc_float4 ( float4 p0 ) { - return trunc ( p0 ); -} +float4 test_trunc_float4(float4 p0) { return trunc(p0); } diff --git a/clang/test/Driver/android-link.cpp b/clang/test/Driver/android-link.cpp index fa9cbc5d0c7a55..f9bdd00507d7bc 100644 --- a/clang/test/Driver/android-link.cpp +++ b/clang/test/Driver/android-link.cpp @@ -17,9 +17,10 @@ // // RUN: %clang -target aarch64-none-linux-android \ // RUN: -### -v %s 2> %t -// RUN: FileCheck -check-prefix=MAX-PAGE-SIZE < %t %s +// RUN: FileCheck -check-prefix=MAX-PAGE-SIZE-AARCH64 < %t %s // // GENERIC-ARM: --fix-cortex-a53-843419 // CORTEX-A53: --fix-cortex-a53-843419 // CORTEX-A57-NOT: --fix-cortex-a53-843419 // MAX-PAGE-SIZE: "-z" "max-page-size=4096" +// MAX-PAGE-SIZE-AARCH64: "-z" "max-page-size=16384" diff --git a/clang/test/OpenMP/interop_codegen.cpp b/clang/test/OpenMP/interop_codegen.cpp index ea83ef8ed4909f..31df2f1ba58c5f 100644 --- a/clang/test/OpenMP/interop_codegen.cpp +++ b/clang/test/OpenMP/interop_codegen.cpp @@ -15,21 +15,31 @@ typedef long omp_intptr_t; extern omp_intptr_t omp_get_interop_int(const omp_interop_t, int, int *); int main() { - omp_interop_t obj = omp_interop_none; + omp_interop_t obj1 = omp_interop_none; + omp_interop_t obj2 = omp_interop_none; omp_interop_t i1 = omp_interop_none; omp_interop_t i2 = omp_interop_none; omp_interop_t i3 = omp_interop_none; omp_interop_t i4 = omp_interop_none; omp_interop_t i5 = omp_interop_none; - #pragma omp interop init(targetsync: i1) init(targetsync: obj) - int id = (int )omp_get_interop_int(obj, omp_ipr_fr_id, NULL); - int id1 = (int )omp_get_interop_int(i1, omp_ipr_fr_id, NULL); + #pragma omp interop init(targetsync: obj1) init(targetsync: obj2) + int id = (int )omp_get_interop_int(obj1, omp_ipr_fr_id, NULL); + int id1 = (int )omp_get_interop_int(obj2, omp_ipr_fr_id, NULL); + + #pragma omp interop init(target,targetsync: i1) use(i2) use(i3) destroy(i4) destroy(i5) + int id2 = (int )omp_get_interop_int(i1, omp_ipr_fr_id, NULL); + int id3 = (int )omp_get_interop_int(i2, omp_ipr_fr_id, NULL); } #endif -// CHECK-LABEL: define {{.+}}main{{.+}} +// CHECK-LABEL: define {{.+}}main{{.+}} +// CHECK: call {{.+}}__tgt_interop_init({{.+}}obj1{{.*}}) +// CHECK: call {{.+}}__tgt_interop_init({{.+}}obj2{{.*}}) // CHECK: call {{.+}}__tgt_interop_init({{.+}}i1{{.*}}) -// CHECK: call {{.+}}__tgt_interop_init({{.+}}obj{{.*}}) +// CHECK: call {{.+}}__tgt_interop_destroy({{.+}}i4{{.*}}) +// CHECK: call {{.+}}__tgt_interop_destroy({{.+}}i5{{.*}}) +// CHECK: call {{.+}}__tgt_interop_use({{.+}}i2{{.*}}) +// CHECK: call {{.+}}__tgt_interop_use({{.+}}i3{{.*}}) diff --git a/clang/test/SemaCXX/restrict-this.cpp b/clang/test/SemaCXX/restrict-this.cpp new file mode 100644 index 00000000000000..e78c8e0d56e2f8 --- /dev/null +++ b/clang/test/SemaCXX/restrict-this.cpp @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -verify -fsyntax-only %s +// expected-no-diagnostics + +struct C { + void f() __restrict { + static_assert(__is_same(decltype(this), C *__restrict)); + (void) [this]() { + static_assert(__is_same(decltype(this), C *__restrict)); + (void) [this]() { static_assert(__is_same(decltype(this), C *__restrict)); }; + + // By-value capture means 'this' is now a different object; do not + // make it __restrict. + (void) [*this]() { static_assert(__is_same(decltype(this), const C *)); }; + (void) [*this]() mutable { static_assert(__is_same(decltype(this), C *)); }; + }; + } +}; + +template struct TC { + void f() __restrict { + static_assert(__is_same(decltype(this), TC *__restrict)); + (void) [this]() { + static_assert(__is_same(decltype(this), TC *__restrict)); + (void) [this]() { static_assert(__is_same(decltype(this), TC *__restrict)); }; + + // By-value capture means 'this' is now a different object; do not + // make it __restrict. + (void) [*this]() { static_assert(__is_same(decltype(this), const TC *)); }; + (void) [*this]() mutable { static_assert(__is_same(decltype(this), TC *)); }; + }; + } +}; + +void f() { + TC{}.f(); +} + +namespace gh18121 { +struct Foo { + void member() __restrict { + Foo *__restrict This = this; + } +}; +} + +namespace gh42411 { +struct foo { + int v; + void f() const __restrict { + static_assert(__is_same(decltype((v)), const int&)); + (void) [this]() { static_assert(__is_same(decltype((v)), const int&)); }; + } +}; +} + +namespace gh82941 { +void f(int& x) { + (void)x; +} + +class C { + int x; + void g() __restrict; +}; + +void C::g() __restrict { + f(this->x); +} +} diff --git a/clang/test/SemaCXX/warn-bool-conversion.cpp b/clang/test/SemaCXX/warn-bool-conversion.cpp index c81d52d864f2d2..9e8cf0e4f8944a 100644 --- a/clang/test/SemaCXX/warn-bool-conversion.cpp +++ b/clang/test/SemaCXX/warn-bool-conversion.cpp @@ -81,6 +81,18 @@ struct S2 { bool f5(); bool f6(int); +#if __cplusplus >= 201103L +auto f7 = []{}; +auto f8 = [](){}; + +void foo() { + bool b; + b = f7; // expected-warning {{address of lambda function pointer conversion operator will always evaluate to 'true'}} + b = f8; // expected-warning {{address of lambda function pointer conversion operator will always evaluate to 'true'}} + bool is_true = [](){ return true; }; + // expected-warning@-1{{address of lambda function pointer conversion operator will always evaluate to 'true'}} +} +#endif void bar() { bool b; diff --git a/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl index 54d093aa7ce3a4..8de8f86d7eb260 100644 --- a/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl @@ -1,109 +1,110 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected -float test_no_second_arg ( float2 p0) { - return __builtin_hlsl_dot ( p0 ); +float test_no_second_arg(float2 p0) { + return __builtin_hlsl_dot(p0); // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} } -float test_too_many_arg ( float2 p0) { - return __builtin_hlsl_dot ( p0, p0, p0 ); +float test_too_many_arg(float2 p0) { + return __builtin_hlsl_dot(p0, p0, p0); // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} } -float test_dot_no_second_arg ( float2 p0) { - return dot ( p0 ); +float test_dot_no_second_arg(float2 p0) { + return dot(p0); // expected-error@-1 {{no matching function for call to 'dot'}} } -float test_dot_vector_size_mismatch ( float3 p0, float2 p1 ) { - return dot ( p0, p1 ); +float test_dot_vector_size_mismatch(float3 p0, float2 p1) { + return dot(p0, p1); // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} } -float test_dot_builtin_vector_size_mismatch ( float3 p0, float2 p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +float test_dot_builtin_vector_size_mismatch(float3 p0, float2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -float test_dot_scalar_mismatch ( float p0, int p1 ) { - return dot ( p0, p1 ); +float test_dot_scalar_mismatch(float p0, int p1) { + return dot(p0, p1); // expected-error@-1 {{call to 'dot' is ambiguous}} } -float test_dot_element_type_mismatch ( int2 p0, float2 p1 ) { - return dot ( p0, p1 ); +float test_dot_element_type_mismatch(int2 p0, float2 p1) { + return dot(p0, p1); // expected-error@-1 {{call to 'dot' is ambiguous}} } //NOTE: for all the *_promotion we are intentionally not handling type promotion in builtins -float test_builtin_dot_vec_int_to_float_promotion ( int2 p0, float2 p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +float test_builtin_dot_vec_int_to_float_promotion(int2 p0, float2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -int64_t test_builtin_dot_vec_int_to_int64_promotion( int64_t2 p0, int2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +int64_t test_builtin_dot_vec_int_to_int64_promotion(int64_t2 p0, int2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -float test_builtin_dot_vec_half_to_float_promotion( float2 p0, half2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +float test_builtin_dot_vec_half_to_float_promotion(float2 p0, half2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } #ifdef __HLSL_ENABLE_16_BIT -float test_builtin_dot_vec_int16_to_float_promotion( float2 p0, int16_t2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +float test_builtin_dot_vec_int16_to_float_promotion(float2 p0, int16_t2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -half test_builtin_dot_vec_int16_to_half_promotion( half2 p0, int16_t2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +half test_builtin_dot_vec_int16_to_half_promotion(half2 p0, int16_t2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -int test_builtin_dot_vec_int16_to_int_promotion( int2 p0, int16_t2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +int test_builtin_dot_vec_int16_to_int_promotion(int2 p0, int16_t2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } -int64_t test_builtin_dot_vec_int16_to_int64_promotion( int64_t2 p0, int16_t2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must have the same type}} +int64_t test_builtin_dot_vec_int16_to_int64_promotion(int64_t2 p0, + int16_t2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must have vectors of the same type}} } #endif -float test_builtin_dot_float2_splat ( float p0, float2 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_builtin_dot_float2_splat(float p0, float2 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -float test_builtin_dot_float3_splat ( float p0, float3 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_builtin_dot_float3_splat(float p0, float3 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -float test_builtin_dot_float4_splat ( float p0, float4 p1 ) { - return __builtin_hlsl_dot( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_builtin_dot_float4_splat(float p0, float4 p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -float test_dot_float2_int_splat ( float2 p0, int p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_dot_float2_int_splat(float2 p0, int p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -float test_dot_float3_int_splat ( float3 p0, int p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_dot_float3_int_splat(float3 p0, int p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -float test_builtin_dot_int_vect_to_float_vec_promotion ( int2 p0, float p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); - // expected-error@-1 {{first two arguments to '__builtin_hlsl_dot' must be vectors}} +float test_builtin_dot_int_vect_to_float_vec_promotion(int2 p0, float p1) { + return __builtin_hlsl_dot(p0, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_dot' must be vectors}} } -int test_builtin_dot_bool_type_promotion ( bool p0, bool p1 ) { - return __builtin_hlsl_dot ( p0, p1 ); +int test_builtin_dot_bool_type_promotion(bool p0, bool p1) { + return __builtin_hlsl_dot(p0, p1); // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}} } diff --git a/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl new file mode 100644 index 00000000000000..06dbdf0a68dfc1 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/frac-errors.hlsl @@ -0,0 +1,27 @@ + +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected + +float test_too_few_arg() { + return __builtin_hlsl_elementwise_frac(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float2 test_too_many_arg(float2 p0) { + return __builtin_hlsl_elementwise_frac(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float builtin_bool_to_float_type_promotion(bool p1) { + return __builtin_hlsl_elementwise_frac(p1); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}} +} + +float builtin_frac_int_to_float_promotion(int p1) { + return __builtin_hlsl_elementwise_frac(p1); + // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} +} + +float2 builtin_frac_int2_to_float2_promotion(int2 p1) { + return __builtin_hlsl_elementwise_frac(p1); + // expected-error@-1 {{passing 'int2' (aka 'vector') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl new file mode 100644 index 00000000000000..4ec5a4cdd26a30 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl @@ -0,0 +1,96 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected + +float2 test_no_second_arg(float2 p0) { + return __builtin_hlsl_lerp(p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} +} + +float2 test_no_third_arg(float2 p0) { + return __builtin_hlsl_lerp(p0, p0); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} +} + +float2 test_too_many_arg(float2 p0) { + return __builtin_hlsl_lerp(p0, p0, p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} + +float2 test_lerp_no_second_arg(float2 p0) { + return lerp(p0); + // expected-error@-1 {{no matching function for call to 'lerp'}} +} + +float2 test_lerp_vector_size_mismatch(float3 p0, float2 p1) { + return lerp(p0, p0, p1); + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} +} + +float2 test_lerp_builtin_vector_size_mismatch(float3 p0, float2 p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must have vectors of the same type}} +} + +float test_lerp_scalar_mismatch(float p0, half p1) { + return lerp(p1, p0, p1); + // expected-error@-1 {{call to 'lerp' is ambiguous}} +} + +float2 test_lerp_element_type_mismatch(half2 p0, float2 p1) { + return lerp(p1, p0, p1); + // expected-error@-1 {{call to 'lerp' is ambiguous}} +} + +float2 test_builtin_lerp_float2_splat(float p0, float2 p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float3 test_builtin_lerp_float3_splat(float p0, float3 p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float4 test_builtin_lerp_float4_splat(float p0, float4 p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float2 test_lerp_float2_int_splat(float2 p0, int p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float3 test_lerp_float3_int_splat(float3 p0, int p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float2 test_builtin_lerp_int_vect_to_float_vec_promotion(int2 p0, float p1) { + return __builtin_hlsl_lerp(p0, p1, p1); + // expected-error@-1 {{all arguments to '__builtin_hlsl_lerp' must be vectors}} +} + +float test_builtin_lerp_bool_type_promotion(bool p0) { + return __builtin_hlsl_lerp(p0, p0, p0); + // expected-error@-1 {{1st argument must be a floating point type (was 'bool')}} +} + +float builtin_bool_to_float_type_promotion(float p0, bool p1) { + return __builtin_hlsl_lerp(p0, p0, p1); + // expected-error@-1 {{3rd argument must be a floating point type (was 'bool')}} +} + +float builtin_bool_to_float_type_promotion2(bool p0, float p1) { + return __builtin_hlsl_lerp(p1, p0, p1); + // expected-error@-1 {{2nd argument must be a floating point type (was 'bool')}} +} + +float builtin_lerp_int_to_float_promotion(float p0, int p1) { + return __builtin_hlsl_lerp(p0, p0, p1); + // expected-error@-1 {{3rd argument must be a floating point type (was 'int')}} +} + +float4 test_lerp_int4(int4 p0, int4 p1, int4 p2) { + return __builtin_hlsl_lerp(p0, p1, p2); + // expected-error@-1 {{1st argument must be a floating point type (was 'int4' (aka 'vector'))}} +} \ No newline at end of file diff --git a/clang/test/SemaHLSL/OverloadResolutionBugs.hlsl b/clang/test/SemaHLSL/OverloadResolutionBugs.hlsl index 8464f1c1a7c2cd..c13cb299127aac 100644 --- a/clang/test/SemaHLSL/OverloadResolutionBugs.hlsl +++ b/clang/test/SemaHLSL/OverloadResolutionBugs.hlsl @@ -7,73 +7,67 @@ void Fn3(double2 D); void Fn3(float2 F); -void Call3(half2 H) { - Fn3(H); -} +void Call3(half2 H) { Fn3(H); } void Fn5(double2 D); -void Call5(half2 H) { - Fn5(H); -} +void Call5(half2 H) { Fn5(H); } void Fn4(int64_t2 L); void Fn4(int2 I); -void Call4(int16_t H) { - Fn4(H); -} +void Call4(int16_t H) { Fn4(H); } -int test_builtin_dot_bool_type_promotion ( bool p0, bool p1 ) { - return dot ( p0, p1 ); +int test_builtin_dot_bool_type_promotion(bool p0, bool p1) { + return dot(p0, p1); } -float test_dot_scalar_mismatch ( float p0, int p1 ) { - return dot ( p0, p1 ); -} +float test_dot_scalar_mismatch(float p0, int p1) { return dot(p0, p1); } -float test_dot_element_type_mismatch ( int2 p0, float2 p1 ) { - return dot ( p0, p1 ); -} +float test_dot_element_type_mismatch(int2 p0, float2 p1) { return dot(p0, p1); } -float test_builtin_dot_vec_int_to_float_promotion ( int2 p0, float2 p1 ) { - return dot ( p0, p1 ); +float test_builtin_dot_vec_int_to_float_promotion(int2 p0, float2 p1) { + return dot(p0, p1); } -int64_t test_builtin_dot_vec_int_to_int64_promotion( int64_t2 p0, int2 p1 ) { - return dot ( p0, p1 ); +int64_t test_builtin_dot_vec_int_to_int64_promotion(int64_t2 p0, int2 p1) { + return dot(p0, p1); } -float test_builtin_dot_vec_half_to_float_promotion( float2 p0, half2 p1 ) { - return dot( p0, p1 ); +float test_builtin_dot_vec_half_to_float_promotion(float2 p0, half2 p1) { + return dot(p0, p1); } -float test_builtin_dot_vec_int16_to_float_promotion( float2 p0, int16_t2 p1 ) { - return dot( p0, p1 ); +float test_builtin_dot_vec_int16_to_float_promotion(float2 p0, int16_t2 p1) { + return dot(p0, p1); } -half test_builtin_dot_vec_int16_to_half_promotion( half2 p0, int16_t2 p1 ) { - return dot( p0, p1 ); +half test_builtin_dot_vec_int16_to_half_promotion(half2 p0, int16_t2 p1) { + return dot(p0, p1); } -int test_builtin_dot_vec_int16_to_int_promotion( int2 p0, int16_t2 p1 ) { - return dot( p0, p1 ); +int test_builtin_dot_vec_int16_to_int_promotion(int2 p0, int16_t2 p1) { + return dot(p0, p1); } -int64_t test_builtin_dot_vec_int16_to_int64_promotion( int64_t2 p0, int16_t2 p1 ) { - return dot( p0, p1 ); +int64_t test_builtin_dot_vec_int16_to_int64_promotion(int64_t2 p0, + int16_t2 p1) { + return dot(p0, p1); } +float4 test_frac_int4(int4 p0) { return frac(p0); } + +float test_frac_int(int p0) { return frac(p0); } + +float test_frac_bool(bool p0) { return frac(p0); } + // https://github.com/llvm/llvm-project/issues/81049 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ // RUN: dxil-pc-shadermodel6.2-library %s -emit-llvm -disable-llvm-passes \ // RUN: -o - | FileCheck %s --check-prefix=NO_HALF -half sqrt_h(half x) -{ - return sqrt(x); -} +half sqrt_h(half x) { return sqrt(x); } // NO_HALF: define noundef float @"?sqrt_h@@YA$halff@$halff@@Z"( // NO_HALF: call float @llvm.sqrt.f32(float %0) diff --git a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl index 81fedc2de31570..2ea7d14e80eebf 100644 --- a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl +++ b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl @@ -40,7 +40,7 @@ void Fn3( int64_t2 p0); // CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'half __attribute__((ext_vector_type(2)))' // CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'half __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'p0' 'half2':'half __attribute__((ext_vector_type(2)))' // CHECKIR-LABEL: Call3 -// CHECKIR: %conv = fptosi <2 x half> {{.*}} to <2 x i64> +// CHECKIR: {{.*}} = fptosi <2 x half> {{.*}} to <2 x i64> void Call3(half2 p0) { Fn3(p0); } diff --git a/clang/test/SemaOpenACC/no-branch-in-out.c b/clang/test/SemaOpenACC/no-branch-in-out.c index f8fb40a1ca8f72..d070247fa65b86 100644 --- a/clang/test/SemaOpenACC/no-branch-in-out.c +++ b/clang/test/SemaOpenACC/no-branch-in-out.c @@ -113,3 +113,200 @@ void Return() { } } } + +void Goto() { + int j; +#pragma acc parallel // expected-note{{invalid branch out of OpenACC Compute Construct}} + while(j) { + if (j <3) + goto LABEL; // expected-error{{cannot jump from this goto statement to its label}} + } + +LABEL: + {} + + goto LABEL_IN; // expected-error{{cannot jump from this goto statement to its label}} + +#pragma acc parallel // expected-note{{invalid branch into OpenACC Compute Construct}} + for(int i = 0; i < 5; ++i) { +LABEL_IN: + {} + } + +#pragma acc parallel + for(int i = 0; i < 5; ++i) { +LABEL_NOT_CALLED: + {} + } + +#pragma acc parallel + { + goto ANOTHER_LOOP; // expected-error{{cannot jump from this goto statement to its label}} + + } +#pragma acc parallel// expected-note{{invalid branch into OpenACC Compute Construct}} + + { +ANOTHER_LOOP: + {} + } + +#pragma acc parallel + { + while (j) { + --j; + if (j < 3) + goto LABEL2; + + if (j > 4) + break; + } +LABEL2: + {} + } + +#pragma acc parallel + do { + if (j < 3) + goto LABEL3; + + if (j > 4) + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + +LABEL3: + {} + } while (j); + +LABEL4: + {} +#pragma acc parallel// expected-note{{invalid branch out of OpenACC Compute Construct}} + { + goto LABEL4;// expected-error{{cannot jump from this goto statement to its label}} + } + +#pragma acc parallel// expected-note{{invalid branch into OpenACC Compute Construct}} + + { +LABEL5: + {} + } + + { + goto LABEL5;// expected-error{{cannot jump from this goto statement to its label}} + } + +#pragma acc parallel + { +LABEL6: + {} + goto LABEL6; + + } + +#pragma acc parallel + goto LABEL7; // expected-error{{cannot jump from this goto statement to its label}} +#pragma acc parallel// expected-note{{invalid branch into OpenACC Compute Construct}} + { +LABEL7:{} + } + +#pragma acc parallel + LABEL8:{} +#pragma acc parallel// expected-note{{invalid branch out of OpenACC Compute Construct}} + { + goto LABEL8;// expected-error{{cannot jump from this goto statement to its label}} + } + + +#pragma acc parallel// expected-note{{invalid branch into OpenACC Compute Construct}} + { +LABEL9:{} + } + + ({goto LABEL9;});// expected-error{{cannot jump from this goto statement to its label}} + +#pragma acc parallel// expected-note{{invalid branch out of OpenACC Compute Construct}} + { + ({goto LABEL10;});// expected-error{{cannot jump from this goto statement to its label}} + } + +LABEL10:{} + + ({goto LABEL11;});// expected-error{{cannot jump from this goto statement to its label}} +#pragma acc parallel// expected-note{{invalid branch into OpenACC Compute Construct}} + { +LABEL11:{} + } + +LABEL12:{} +#pragma acc parallel// expected-note{{invalid branch out of OpenACC Compute Construct}} + { + ({goto LABEL12;});// expected-error{{cannot jump from this goto statement to its label}} + } + +#pragma acc parallel + { + ({goto LABEL13;}); +LABEL13:{} + } + +#pragma acc parallel + { + LABEL14:{} + ({goto LABEL14;}); + } +} + +void IndirectGoto1() { + void* ptr; +#pragma acc parallel + { +LABEL1:{} + ptr = &&LABEL1; + + goto *ptr; + + } +} + +void IndirectGoto2() { + void* ptr; +LABEL2:{} // #GOTOLBL2 + ptr = &&LABEL2; +#pragma acc parallel // #GOTOPAR2 + { +// expected-error@+3{{cannot jump from this indirect goto statement to one of its possible targets}} +// expected-note@#GOTOLBL2{{possible target of indirect goto statement}} +// expected-note@#GOTOPAR2{{invalid branch out of OpenACC Compute Construct}} + goto *ptr; + } +} + +void IndirectGoto3() { + void* ptr; +#pragma acc parallel // #GOTOPAR3 + { +LABEL3:{} // #GOTOLBL3 + ptr = &&LABEL3; + } +// expected-error@+3{{cannot jump from this indirect goto statement to one of its possible targets}} +// expected-note@#GOTOLBL3{{possible target of indirect goto statement}} +// expected-note@#GOTOPAR3{{invalid branch into OpenACC Compute Construct}} + goto *ptr; +} + +void IndirectGoto4() { + void* ptr; +#pragma acc parallel // #GOTOPAR4 + { +LABEL4:{} + ptr = &&LABEL4; +// expected-error@+3{{cannot jump from this indirect goto statement to one of its possible targets}} +// expected-note@#GOTOLBL5{{possible target of indirect goto statement}} +// expected-note@#GOTOPAR4{{invalid branch out of OpenACC Compute Construct}} + goto *ptr; + } +LABEL5:// #GOTOLBL5 + + ptr=&&LABEL5; +} diff --git a/clang/test/SemaOpenACC/no-branch-in-out.cpp b/clang/test/SemaOpenACC/no-branch-in-out.cpp index 232e372cedd357..9affdf733ace8d 100644 --- a/clang/test/SemaOpenACC/no-branch-in-out.cpp +++ b/clang/test/SemaOpenACC/no-branch-in-out.cpp @@ -15,3 +15,100 @@ void ReturnTest() { } } } + +template +void BreakContinue() { + +#pragma acc parallel + for(int i =0; i < 5; ++i) { + switch(i) { + case 0: + break; // leaves switch, not 'for'. + default: + i +=2; + break; + } + if (i == 2) + continue; + + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + + int j; + switch(j) { + case 0: +#pragma acc parallel + { + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + case 1: +#pragma acc parallel + { + } + break; + } + +#pragma acc parallel + for(int i = 0; i < 5; ++i) { + if (i > 1) + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + +#pragma acc parallel + switch(j) { + case 1: + break; + } + +#pragma acc parallel + { + for(int i = 1; i < 100; i++) { + if (i > 4) + break; + } + } + + for (int i =0; i < 5; ++i) { +#pragma acc parallel + { + continue; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + } + +#pragma acc parallel + for (int i =0; i < 5; ++i) { + continue; + } + +#pragma acc parallel + for (int i =0; i < 5; ++i) { + { + continue; + } + } + + for (int i =0; i < 5; ++i) { +#pragma acc parallel + { + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + } + +#pragma acc parallel + while (j) { + --j; + if (j > 4) + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } + +#pragma acc parallel + do { + --j; + if (j > 4) + break; // expected-error{{invalid branch out of OpenACC Compute Construct}} + } while (j ); +} + +void Instantiate() { + BreakContinue(); +} diff --git a/flang/runtime/Float128Math/CMakeLists.txt b/flang/runtime/Float128Math/CMakeLists.txt index f11678cd70b769..60d44c78be0faf 100644 --- a/flang/runtime/Float128Math/CMakeLists.txt +++ b/flang/runtime/Float128Math/CMakeLists.txt @@ -59,7 +59,9 @@ set(sources erf.cpp erfc.cpp exp.cpp + exponent.cpp floor.cpp + fraction.cpp hypot.cpp j0.cpp j1.cpp @@ -69,11 +71,18 @@ set(sources log.cpp log10.cpp lround.cpp + mod-real.cpp + modulo-real.cpp + nearest.cpp norm2.cpp pow.cpp round.cpp + rrspacing.cpp + scale.cpp + set-exponent.cpp sin.cpp sinh.cpp + spacing.cpp sqrt.cpp tan.cpp tanh.cpp diff --git a/flang/runtime/Float128Math/acos.cpp b/flang/runtime/Float128Math/acos.cpp index 531c79c7444bd3..14ff6944856844 100644 --- a/flang/runtime/Float128Math/acos.cpp +++ b/flang/runtime/Float128Math/acos.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AcosF128)( CppTypeFor x) { - return Acos::invoke(x); + return Acos::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/acosh.cpp b/flang/runtime/Float128Math/acosh.cpp index 1495120edd1a07..9d70804e44a470 100644 --- a/flang/runtime/Float128Math/acosh.cpp +++ b/flang/runtime/Float128Math/acosh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AcoshF128)( CppTypeFor x) { - return Acosh::invoke(x); + return Acosh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/asin.cpp b/flang/runtime/Float128Math/asin.cpp index 2fb8c6c5e97d71..6781b23f0363db 100644 --- a/flang/runtime/Float128Math/asin.cpp +++ b/flang/runtime/Float128Math/asin.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AsinF128)( CppTypeFor x) { - return Asin::invoke(x); + return Asin::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/asinh.cpp b/flang/runtime/Float128Math/asinh.cpp index 3630a77be42b2c..1310bc61c1de0f 100644 --- a/flang/runtime/Float128Math/asinh.cpp +++ b/flang/runtime/Float128Math/asinh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AsinhF128)( CppTypeFor x) { - return Asinh::invoke(x); + return Asinh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/atan.cpp b/flang/runtime/Float128Math/atan.cpp index 4609343e9d1273..f01382df90c0ee 100644 --- a/flang/runtime/Float128Math/atan.cpp +++ b/flang/runtime/Float128Math/atan.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AtanF128)( CppTypeFor x) { - return Atan::invoke(x); + return Atan::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/atan2.cpp b/flang/runtime/Float128Math/atan2.cpp index c0175e67ec71bd..dd646b0452b115 100644 --- a/flang/runtime/Float128Math/atan2.cpp +++ b/flang/runtime/Float128Math/atan2.cpp @@ -15,7 +15,7 @@ extern "C" { CppTypeFor RTDEF(Atan2F128)( CppTypeFor x, CppTypeFor y) { - return Atan2::invoke(x, y); + return Atan2::invoke(x, y); } #endif diff --git a/flang/runtime/Float128Math/atanh.cpp b/flang/runtime/Float128Math/atanh.cpp index bfacb967117d70..5fc5ba5debc81a 100644 --- a/flang/runtime/Float128Math/atanh.cpp +++ b/flang/runtime/Float128Math/atanh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(AtanhF128)( CppTypeFor x) { - return Atanh::invoke(x); + return Atanh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/cabs.cpp b/flang/runtime/Float128Math/cabs.cpp index 827b197a6a81ae..3b8c9d17003c6e 100644 --- a/flang/runtime/Float128Math/cabs.cpp +++ b/flang/runtime/Float128Math/cabs.cpp @@ -16,7 +16,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 // NOTE: Flang calls the runtime APIs using C _Complex ABI CppTypeFor RTDEF(CAbsF128)(CFloat128ComplexType x) { - return CAbs::invoke(x); + return CAbs::invoke(x); } #endif #endif diff --git a/flang/runtime/Float128Math/ceil.cpp b/flang/runtime/Float128Math/ceil.cpp index a53a2c27c616b5..ed4d164a62bedc 100644 --- a/flang/runtime/Float128Math/ceil.cpp +++ b/flang/runtime/Float128Math/ceil.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(CeilF128)( CppTypeFor x) { - return Ceil::invoke(x); + return Ceil::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/cos.cpp b/flang/runtime/Float128Math/cos.cpp index 845c970bd8e639..b93c92f275f791 100644 --- a/flang/runtime/Float128Math/cos.cpp +++ b/flang/runtime/Float128Math/cos.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(CosF128)( CppTypeFor x) { - return Cos::invoke(x); + return Cos::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/cosh.cpp b/flang/runtime/Float128Math/cosh.cpp index acf6ff4130ee3c..a3662a826dcb1c 100644 --- a/flang/runtime/Float128Math/cosh.cpp +++ b/flang/runtime/Float128Math/cosh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(CoshF128)( CppTypeFor x) { - return Cosh::invoke(x); + return Cosh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/erf.cpp b/flang/runtime/Float128Math/erf.cpp index 862f3b97411873..631f71c76effe7 100644 --- a/flang/runtime/Float128Math/erf.cpp +++ b/flang/runtime/Float128Math/erf.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(ErfF128)( CppTypeFor x) { - return Erf::invoke(x); + return Erf::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/erfc.cpp b/flang/runtime/Float128Math/erfc.cpp index 0ac0b945563747..ea3cd646d8c4ba 100644 --- a/flang/runtime/Float128Math/erfc.cpp +++ b/flang/runtime/Float128Math/erfc.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(ErfcF128)( CppTypeFor x) { - return Erfc::invoke(x); + return Erfc::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/exp.cpp b/flang/runtime/Float128Math/exp.cpp index 50386fdbfb6449..b1161b0f29294c 100644 --- a/flang/runtime/Float128Math/exp.cpp +++ b/flang/runtime/Float128Math/exp.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(ExpF128)( CppTypeFor x) { - return Exp::invoke(x); + return Exp::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/exponent.cpp b/flang/runtime/Float128Math/exponent.cpp new file mode 100644 index 00000000000000..1be1dd0d0ac8b8 --- /dev/null +++ b/flang/runtime/Float128Math/exponent.cpp @@ -0,0 +1,26 @@ +//===-- runtime/Float128Math/exponent.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// EXPONENT (16.9.75) +CppTypeFor RTDEF(Exponent16_4)(F128Type x) { + return Exponent>(x); +} +CppTypeFor RTDEF(Exponent16_8)(F128Type x) { + return Exponent>(x); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/floor.cpp b/flang/runtime/Float128Math/floor.cpp index 48cf4e01448070..78a94984cac8a3 100644 --- a/flang/runtime/Float128Math/floor.cpp +++ b/flang/runtime/Float128Math/floor.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(FloorF128)( CppTypeFor x) { - return Floor::invoke(x); + return Floor::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/fraction.cpp b/flang/runtime/Float128Math/fraction.cpp new file mode 100644 index 00000000000000..8c9889b7f6871e --- /dev/null +++ b/flang/runtime/Float128Math/fraction.cpp @@ -0,0 +1,21 @@ +//===-- runtime/Float128Math/fraction.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// FRACTION (16.9.80) +F128Type RTDEF(Fraction16)(F128Type x) { return Fraction(x); } +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/hypot.cpp b/flang/runtime/Float128Math/hypot.cpp index 33c83a1654993e..b4fa1d66bcfa6a 100644 --- a/flang/runtime/Float128Math/hypot.cpp +++ b/flang/runtime/Float128Math/hypot.cpp @@ -15,7 +15,7 @@ extern "C" { CppTypeFor RTDEF(HypotF128)( CppTypeFor x, CppTypeFor y) { - return Hypot::invoke(x, y); + return Hypot::invoke(x, y); } #endif diff --git a/flang/runtime/Float128Math/j0.cpp b/flang/runtime/Float128Math/j0.cpp index f8f3fe71d8a616..9390a7eeb3c605 100644 --- a/flang/runtime/Float128Math/j0.cpp +++ b/flang/runtime/Float128Math/j0.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(J0F128)( CppTypeFor x) { - return J0::invoke(x); + return J0::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/j1.cpp b/flang/runtime/Float128Math/j1.cpp index 9a51b973e1cf88..c54927123388c6 100644 --- a/flang/runtime/Float128Math/j1.cpp +++ b/flang/runtime/Float128Math/j1.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(J1F128)( CppTypeFor x) { - return J1::invoke(x); + return J1::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/jn.cpp b/flang/runtime/Float128Math/jn.cpp index 644a66863c0d23..15afd83400c320 100644 --- a/flang/runtime/Float128Math/jn.cpp +++ b/flang/runtime/Float128Math/jn.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(JnF128)( int n, CppTypeFor x) { - return Jn::invoke(n, x); + return Jn::invoke(n, x); } #endif diff --git a/flang/runtime/Float128Math/lgamma.cpp b/flang/runtime/Float128Math/lgamma.cpp index fff7dfcb9c15db..ac31c89a912b32 100644 --- a/flang/runtime/Float128Math/lgamma.cpp +++ b/flang/runtime/Float128Math/lgamma.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(LgammaF128)( CppTypeFor x) { - return Lgamma::invoke(x); + return Lgamma::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/llround.cpp b/flang/runtime/Float128Math/llround.cpp index 00c62818af19db..b77281c507fe7c 100644 --- a/flang/runtime/Float128Math/llround.cpp +++ b/flang/runtime/Float128Math/llround.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(LlroundF128)( CppTypeFor x) { - return Llround::invoke(x); + return Llround::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/log.cpp b/flang/runtime/Float128Math/log.cpp index 0cfe329c6f7f59..38e6b581fd849c 100644 --- a/flang/runtime/Float128Math/log.cpp +++ b/flang/runtime/Float128Math/log.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(LogF128)( CppTypeFor x) { - return Log::invoke(x); + return Log::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/log10.cpp b/flang/runtime/Float128Math/log10.cpp index cd8bf27fcb121b..3c89c0e707774f 100644 --- a/flang/runtime/Float128Math/log10.cpp +++ b/flang/runtime/Float128Math/log10.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(Log10F128)( CppTypeFor x) { - return Log10::invoke(x); + return Log10::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/lround.cpp b/flang/runtime/Float128Math/lround.cpp index 6ced66a1b2d3af..ce7a228038a1d3 100644 --- a/flang/runtime/Float128Math/lround.cpp +++ b/flang/runtime/Float128Math/lround.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(LroundF128)( CppTypeFor x) { - return Lround::invoke(x); + return Lround::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/math-entries.h b/flang/runtime/Float128Math/math-entries.h index a0d81d0cbb5407..ad3f6aa18aa9a1 100644 --- a/flang/runtime/Float128Math/math-entries.h +++ b/flang/runtime/Float128Math/math-entries.h @@ -13,36 +13,40 @@ #include "flang/Common/float128.h" #include "flang/Runtime/entry-names.h" #include +#include #include +namespace { +using namespace Fortran::runtime; +using F128RetType = CppTypeFor; +using I32RetType = CppTypeFor; +using I64RetType = CppTypeFor; +} // namespace + namespace Fortran::runtime { // Define a class template to gracefully fail, when // there is no specialized template that implements // the required function via using the third-party // implementation. -#define DEFINE_FALLBACK(caller) \ - template struct caller { \ - template \ - [[noreturn]] static std::invoke_result_t invoke( \ - ATs... args) { \ +#define DEFINE_FALLBACK(caller, ret_type) \ + template struct caller { \ + template [[noreturn]] static RT invoke(ATs... args) { \ Terminator terminator{__FILE__, __LINE__}; \ terminator.Crash("Float128 variant of '%s' is unsupported", #caller); \ } \ }; // Define template specialization that is calling the third-party -// implementation. The template is specialized by a function pointer -// that is the FortranFloat128Math entry point. The signatures -// of the caller and the callee must match. +// implementation. // // Defining the specialization for any target library requires // adding the generic template via DEFINE_FALLBACK, so that // a build with another target library that does not define // the same alias can gracefully fail in runtime. #define DEFINE_SIMPLE_ALIAS(caller, callee) \ - template struct caller

{ \ - static RT invoke(ATs... args) { \ + template struct caller { \ + template static RT invoke(ATs... args) { \ static_assert(std::is_invocable_r_v()...))(ATs...), ATs...>); \ if constexpr (std::is_same_v) { \ @@ -54,48 +58,58 @@ namespace Fortran::runtime { }; // Define fallback callers. -DEFINE_FALLBACK(Abs) -DEFINE_FALLBACK(Acos) -DEFINE_FALLBACK(Acosh) -DEFINE_FALLBACK(Asin) -DEFINE_FALLBACK(Asinh) -DEFINE_FALLBACK(Atan) -DEFINE_FALLBACK(Atan2) -DEFINE_FALLBACK(Atanh) -DEFINE_FALLBACK(Ceil) -DEFINE_FALLBACK(Cos) -DEFINE_FALLBACK(Cosh) -DEFINE_FALLBACK(Erf) -DEFINE_FALLBACK(Erfc) -DEFINE_FALLBACK(Exp) -DEFINE_FALLBACK(Floor) -DEFINE_FALLBACK(Hypot) -DEFINE_FALLBACK(J0) -DEFINE_FALLBACK(J1) -DEFINE_FALLBACK(Jn) -DEFINE_FALLBACK(Lgamma) -DEFINE_FALLBACK(Llround) -DEFINE_FALLBACK(Lround) -DEFINE_FALLBACK(Log) -DEFINE_FALLBACK(Log10) -DEFINE_FALLBACK(Pow) -DEFINE_FALLBACK(Round) -DEFINE_FALLBACK(Sin) -DEFINE_FALLBACK(Sinh) -DEFINE_FALLBACK(Sqrt) -DEFINE_FALLBACK(Tan) -DEFINE_FALLBACK(Tanh) -DEFINE_FALLBACK(Tgamma) -DEFINE_FALLBACK(Trunc) -DEFINE_FALLBACK(Y0) -DEFINE_FALLBACK(Y1) -DEFINE_FALLBACK(Yn) +#define DEFINE_FALLBACK_F128(caller) DEFINE_FALLBACK(caller, ::F128RetType) +#define DEFINE_FALLBACK_I32(caller) DEFINE_FALLBACK(caller, ::I32RetType) +#define DEFINE_FALLBACK_I64(caller) DEFINE_FALLBACK(caller, ::I64RetType) + +DEFINE_FALLBACK_F128(Abs) +DEFINE_FALLBACK_F128(Acos) +DEFINE_FALLBACK_F128(Acosh) +DEFINE_FALLBACK_F128(Asin) +DEFINE_FALLBACK_F128(Asinh) +DEFINE_FALLBACK_F128(Atan) +DEFINE_FALLBACK_F128(Atan2) +DEFINE_FALLBACK_F128(Atanh) +DEFINE_FALLBACK_F128(Ceil) +DEFINE_FALLBACK_F128(Cos) +DEFINE_FALLBACK_F128(Cosh) +DEFINE_FALLBACK_F128(Erf) +DEFINE_FALLBACK_F128(Erfc) +DEFINE_FALLBACK_F128(Exp) +DEFINE_FALLBACK_F128(Floor) +DEFINE_FALLBACK_F128(Frexp) +DEFINE_FALLBACK_F128(Hypot) +DEFINE_FALLBACK_I32(Ilogb) +DEFINE_FALLBACK_I32(Isinf) +DEFINE_FALLBACK_I32(Isnan) +DEFINE_FALLBACK_F128(J0) +DEFINE_FALLBACK_F128(J1) +DEFINE_FALLBACK_F128(Jn) +DEFINE_FALLBACK_F128(Ldexp) +DEFINE_FALLBACK_F128(Lgamma) +DEFINE_FALLBACK_I64(Llround) +DEFINE_FALLBACK_F128(Log) +DEFINE_FALLBACK_F128(Log10) +DEFINE_FALLBACK_I32(Lround) +DEFINE_FALLBACK_F128(Nextafter) +DEFINE_FALLBACK_F128(Pow) +DEFINE_FALLBACK_F128(Qnan) +DEFINE_FALLBACK_F128(Round) +DEFINE_FALLBACK_F128(Sin) +DEFINE_FALLBACK_F128(Sinh) +DEFINE_FALLBACK_F128(Sqrt) +DEFINE_FALLBACK_F128(Tan) +DEFINE_FALLBACK_F128(Tanh) +DEFINE_FALLBACK_F128(Tgamma) +DEFINE_FALLBACK_F128(Trunc) +DEFINE_FALLBACK_F128(Y0) +DEFINE_FALLBACK_F128(Y1) +DEFINE_FALLBACK_F128(Yn) #if HAS_LIBM -// Define wrapper callers for libm. -#include -#include +#include +// Define wrapper callers for libm. #if LDBL_MANT_DIG == 113 // Use STD math functions. They provide IEEE-754 128-bit float // support either via 'long double' or __float128. @@ -118,15 +132,21 @@ DEFINE_SIMPLE_ALIAS(Erf, std::erf) DEFINE_SIMPLE_ALIAS(Erfc, std::erfc) DEFINE_SIMPLE_ALIAS(Exp, std::exp) DEFINE_SIMPLE_ALIAS(Floor, std::floor) +DEFINE_SIMPLE_ALIAS(Frexp, std::frexp) DEFINE_SIMPLE_ALIAS(Hypot, std::hypot) +DEFINE_SIMPLE_ALIAS(Ilogb, std::ilogb) +DEFINE_SIMPLE_ALIAS(Isinf, std::isinf) +DEFINE_SIMPLE_ALIAS(Isnan, std::isnan) DEFINE_SIMPLE_ALIAS(J0, j0l) DEFINE_SIMPLE_ALIAS(J1, j1l) DEFINE_SIMPLE_ALIAS(Jn, jnl) +DEFINE_SIMPLE_ALIAS(Ldexp, std::ldexp) DEFINE_SIMPLE_ALIAS(Lgamma, std::lgamma) DEFINE_SIMPLE_ALIAS(Llround, std::llround) -DEFINE_SIMPLE_ALIAS(Lround, std::lround) DEFINE_SIMPLE_ALIAS(Log, std::log) DEFINE_SIMPLE_ALIAS(Log10, std::log10) +DEFINE_SIMPLE_ALIAS(Lround, std::lround) +DEFINE_SIMPLE_ALIAS(Nextafter, std::nextafter) DEFINE_SIMPLE_ALIAS(Pow, std::pow) DEFINE_SIMPLE_ALIAS(Round, std::round) DEFINE_SIMPLE_ALIAS(Sin, std::sin) @@ -139,6 +159,12 @@ DEFINE_SIMPLE_ALIAS(Trunc, std::trunc) DEFINE_SIMPLE_ALIAS(Y0, y0l) DEFINE_SIMPLE_ALIAS(Y1, y1l) DEFINE_SIMPLE_ALIAS(Yn, ynl) + +// Use numeric_limits to produce infinity of the right type. +#define F128_RT_INFINITY \ + (std::numeric_limits>::infinity()) +#define F128_RT_QNAN \ + (std::numeric_limits>::quiet_NaN()) #else // LDBL_MANT_DIG != 113 #if !HAS_LIBMF128 // glibc >=2.26 seems to have complete support for __float128 @@ -172,15 +198,21 @@ DEFINE_SIMPLE_ALIAS(Erf, erfq) DEFINE_SIMPLE_ALIAS(Erfc, erfcq) DEFINE_SIMPLE_ALIAS(Exp, expq) DEFINE_SIMPLE_ALIAS(Floor, floorq) +DEFINE_SIMPLE_ALIAS(Frexp, frexpq) DEFINE_SIMPLE_ALIAS(Hypot, hypotq) +DEFINE_SIMPLE_ALIAS(Ilogb, ilogbq) +DEFINE_SIMPLE_ALIAS(Isinf, isinfq) +DEFINE_SIMPLE_ALIAS(Isnan, isnanq) DEFINE_SIMPLE_ALIAS(J0, j0q) DEFINE_SIMPLE_ALIAS(J1, j1q) DEFINE_SIMPLE_ALIAS(Jn, jnq) +DEFINE_SIMPLE_ALIAS(Ldexp, ldexpq) DEFINE_SIMPLE_ALIAS(Lgamma, lgammaq) DEFINE_SIMPLE_ALIAS(Llround, llroundq) -DEFINE_SIMPLE_ALIAS(Lround, lroundq) DEFINE_SIMPLE_ALIAS(Log, logq) DEFINE_SIMPLE_ALIAS(Log10, log10q) +DEFINE_SIMPLE_ALIAS(Lround, lroundq) +DEFINE_SIMPLE_ALIAS(Nextafter, nextafterq) DEFINE_SIMPLE_ALIAS(Pow, powq) DEFINE_SIMPLE_ALIAS(Round, roundq) DEFINE_SIMPLE_ALIAS(Sin, sinq) @@ -193,19 +225,11 @@ DEFINE_SIMPLE_ALIAS(Trunc, truncq) DEFINE_SIMPLE_ALIAS(Y0, y0q) DEFINE_SIMPLE_ALIAS(Y1, y1q) DEFINE_SIMPLE_ALIAS(Yn, ynq) -#endif -extern "C" { -// Declarations of the entry points that might be referenced -// within the Float128Math library itself. -// Note that not all of these entry points are actually -// defined in this library. Some of them are used just -// as template parameters to call the corresponding callee directly. -CppTypeFor RTDECL(AbsF128)( - CppTypeFor x); -CppTypeFor RTDECL(SqrtF128)( - CppTypeFor x); -} // extern "C" +// Use cmath INFINITY/NAN definition. Rely on C implicit conversions. +#define F128_RT_INFINITY (INFINITY) +#define F128_RT_QNAN (NAN) +#endif } // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/mod-real.cpp b/flang/runtime/Float128Math/mod-real.cpp new file mode 100644 index 00000000000000..42e6ce76e2fa1b --- /dev/null +++ b/flang/runtime/Float128Math/mod-real.cpp @@ -0,0 +1,24 @@ +//===-- runtime/Float128Math/mod-real.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// MOD (16.9.135) +F128Type RTDEF(ModReal16)( + F128Type x, F128Type p, const char *sourceFile, int sourceLine) { + return RealMod(x, p, sourceFile, sourceLine); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/modulo-real.cpp b/flang/runtime/Float128Math/modulo-real.cpp new file mode 100644 index 00000000000000..13000aba8c8323 --- /dev/null +++ b/flang/runtime/Float128Math/modulo-real.cpp @@ -0,0 +1,24 @@ +//===-- runtime/Float128Math/modulo-real.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// MODULO (16.9.136) +F128Type RTDEF(ModuloReal16)( + F128Type x, F128Type p, const char *sourceFile, int sourceLine) { + return RealMod(x, p, sourceFile, sourceLine); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/nearest.cpp b/flang/runtime/Float128Math/nearest.cpp new file mode 100644 index 00000000000000..148ac4ef839160 --- /dev/null +++ b/flang/runtime/Float128Math/nearest.cpp @@ -0,0 +1,23 @@ +//===-- runtime/Float128Math/nearest.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +CppTypeFor RTDEF(Nearest16)( + CppTypeFor x, bool positive) { + return Nextafter::invoke( + x, positive ? F128_RT_INFINITY : -F128_RT_INFINITY); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/norm2.cpp b/flang/runtime/Float128Math/norm2.cpp index 17453bd2d6cbd7..15c482f7f007ce 100644 --- a/flang/runtime/Float128Math/norm2.cpp +++ b/flang/runtime/Float128Math/norm2.cpp @@ -7,39 +7,17 @@ //===----------------------------------------------------------------------===// #include "math-entries.h" +#include "numeric-template-specs.h" #include "reduction-templates.h" -#include - -#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 - -namespace { -using namespace Fortran::runtime; - -using AccumType = Norm2AccumType<16>; - -struct ABSTy { - static AccumType compute(AccumType x) { - return Sqrt::invoke(x); - } -}; - -struct SQRTTy { - static AccumType compute(AccumType x) { - return Sqrt::invoke(x); - } -}; - -using Float128Norm2Accumulator = Norm2Accumulator<16, ABSTy, SQRTTy>; -} // namespace namespace Fortran::runtime { extern "C" { +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(Norm2_16)( const Descriptor &x, const char *source, int line, int dim) { - auto accumulator{::Float128Norm2Accumulator(x)}; return GetTotalReduction( - x, source, line, dim, nullptr, accumulator, "NORM2"); + x, source, line, dim, nullptr, Norm2Accumulator<16>{x}, "NORM2"); } void RTDEF(Norm2DimReal16)(Descriptor &result, const Descriptor &x, int dim, @@ -49,11 +27,9 @@ void RTDEF(Norm2DimReal16)(Descriptor &result, const Descriptor &x, int dim, RUNTIME_CHECK(terminator, type); RUNTIME_CHECK( terminator, type->first == TypeCategory::Real && type->second == 16); - DoMaxMinNorm2( - result, x, dim, nullptr, "NORM2", terminator); + Norm2Helper<16>{}(result, x, dim, nullptr, terminator); } +#endif } // extern "C" } // namespace Fortran::runtime - -#endif diff --git a/flang/runtime/Float128Math/numeric-template-specs.h b/flang/runtime/Float128Math/numeric-template-specs.h new file mode 100644 index 00000000000000..a0a77230c3e9eb --- /dev/null +++ b/flang/runtime/Float128Math/numeric-template-specs.h @@ -0,0 +1,55 @@ +//===-- runtime/Float128Math/numeric-template-specs.h -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_FLOAT128MATH_NUMERIC_TEMPLATE_SPECS_H_ +#define FORTRAN_RUNTIME_FLOAT128MATH_NUMERIC_TEMPLATE_SPECS_H_ + +#include "math-entries.h" +#include "numeric-templates.h" + +namespace Fortran::runtime { +using F128Type = CppTypeFor; + +template <> struct ABSTy { + static F128Type compute(F128Type x) { return Abs::invoke(x); } +}; + +template <> struct FREXPTy { + static F128Type compute(F128Type x, int *e) { + return Frexp::invoke(x, e); + } +}; + +template <> struct ILOGBTy { + static int compute(F128Type x) { return Ilogb::invoke(x); } +}; + +template <> struct ISINFTy { + static bool compute(F128Type x) { return Isinf::invoke(x); } +}; + +template <> struct ISNANTy { + static bool compute(F128Type x) { return Isnan::invoke(x); } +}; + +template <> struct LDEXPTy { + template static F128Type compute(F128Type x, ET p) { + return Ldexp::invoke(x, p); + } +}; + +template <> struct QNANTy { + static F128Type compute() { return F128_RT_QNAN; } +}; + +template <> struct SQRTTy { + static F128Type compute(F128Type x) { return Sqrt::invoke(x); } +}; + +} // namespace Fortran::runtime +#endif // FORTRAN_RUNTIME_FLOAT128MATH_NUMERIC_TEMPLATE_SPECS_H_ diff --git a/flang/runtime/Float128Math/pow.cpp b/flang/runtime/Float128Math/pow.cpp index 02958a890e5221..7a48828ee3e765 100644 --- a/flang/runtime/Float128Math/pow.cpp +++ b/flang/runtime/Float128Math/pow.cpp @@ -15,7 +15,7 @@ extern "C" { CppTypeFor RTDEF(PowF128)( CppTypeFor x, CppTypeFor y) { - return Pow::invoke(x, y); + return Pow::invoke(x, y); } #endif diff --git a/flang/runtime/Float128Math/round.cpp b/flang/runtime/Float128Math/round.cpp index 43ab57768cb77a..6420c1bc9cd25d 100644 --- a/flang/runtime/Float128Math/round.cpp +++ b/flang/runtime/Float128Math/round.cpp @@ -18,7 +18,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(RoundF128)( CppTypeFor x) { - return Round::invoke(x); + return Round::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/rrspacing.cpp b/flang/runtime/Float128Math/rrspacing.cpp new file mode 100644 index 00000000000000..feddac418eec39 --- /dev/null +++ b/flang/runtime/Float128Math/rrspacing.cpp @@ -0,0 +1,21 @@ +//===-- runtime/Float128Math/rrspacing.cpp --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// FRACTION (16.9.80) +F128Type RTDEF(RRSpacing16)(F128Type x) { return RRSpacing<113>(x); } +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/scale.cpp b/flang/runtime/Float128Math/scale.cpp new file mode 100644 index 00000000000000..0be958bd9f2a72 --- /dev/null +++ b/flang/runtime/Float128Math/scale.cpp @@ -0,0 +1,28 @@ +//===-- runtime/Float128Math/scale.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" +#include + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +F128Type RTDEF(Scale16)(F128Type x, std::int64_t p) { + auto ip{static_cast(p)}; + if (ip != p) { + ip = p < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); + } + return LDEXPTy::compute(x, ip); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/set-exponent.cpp b/flang/runtime/Float128Math/set-exponent.cpp new file mode 100644 index 00000000000000..99c34af7962b9a --- /dev/null +++ b/flang/runtime/Float128Math/set-exponent.cpp @@ -0,0 +1,23 @@ +//===-- runtime/Float128Math/set-exponent.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// SET_EXPONENT (16.9.171) +F128Type RTDEF(SetExponent16)(F128Type x, std::int64_t p) { + return SetExponent(x, p); +} +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/sin.cpp b/flang/runtime/Float128Math/sin.cpp index 013eb9d119a6a3..8ebc3f9881586e 100644 --- a/flang/runtime/Float128Math/sin.cpp +++ b/flang/runtime/Float128Math/sin.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(SinF128)( CppTypeFor x) { - return Sin::invoke(x); + return Sin::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/sinh.cpp b/flang/runtime/Float128Math/sinh.cpp index 9c907041fd7eb4..aa716a3e51ef5a 100644 --- a/flang/runtime/Float128Math/sinh.cpp +++ b/flang/runtime/Float128Math/sinh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(SinhF128)( CppTypeFor x) { - return Sinh::invoke(x); + return Sinh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/spacing.cpp b/flang/runtime/Float128Math/spacing.cpp new file mode 100644 index 00000000000000..a86c0b30e567ab --- /dev/null +++ b/flang/runtime/Float128Math/spacing.cpp @@ -0,0 +1,21 @@ +//===-- runtime/Float128Math/spacing.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "math-entries.h" +#include "numeric-template-specs.h" + +namespace Fortran::runtime { +extern "C" { + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +// SPACING (16.9.180) +F128Type RTDEF(Spacing16)(F128Type x) { return Spacing<113>(x); } +#endif + +} // extern "C" +} // namespace Fortran::runtime diff --git a/flang/runtime/Float128Math/sqrt.cpp b/flang/runtime/Float128Math/sqrt.cpp index aafbd850ca973a..83165a4c623191 100644 --- a/flang/runtime/Float128Math/sqrt.cpp +++ b/flang/runtime/Float128Math/sqrt.cpp @@ -7,15 +7,13 @@ //===----------------------------------------------------------------------===// #include "math-entries.h" +#include "numeric-template-specs.h" namespace Fortran::runtime { extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -CppTypeFor RTDEF(SqrtF128)( - CppTypeFor x) { - return Sqrt::invoke(x); -} +F128Type RTDEF(SqrtF128)(F128Type x) { return SQRTTy::compute(x); } #endif } // extern "C" diff --git a/flang/runtime/Float128Math/tan.cpp b/flang/runtime/Float128Math/tan.cpp index 01d3c7bdd2e85d..8f4b723ca977bd 100644 --- a/flang/runtime/Float128Math/tan.cpp +++ b/flang/runtime/Float128Math/tan.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(TanF128)( CppTypeFor x) { - return Tan::invoke(x); + return Tan::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/tanh.cpp b/flang/runtime/Float128Math/tanh.cpp index fedc1a4120caf5..b43a89520b6797 100644 --- a/flang/runtime/Float128Math/tanh.cpp +++ b/flang/runtime/Float128Math/tanh.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(TanhF128)( CppTypeFor x) { - return Tanh::invoke(x); + return Tanh::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/tgamma.cpp b/flang/runtime/Float128Math/tgamma.cpp index 329defff38cf91..93f97800bdc966 100644 --- a/flang/runtime/Float128Math/tgamma.cpp +++ b/flang/runtime/Float128Math/tgamma.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(TgammaF128)( CppTypeFor x) { - return Tgamma::invoke(x); + return Tgamma::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/trunc.cpp b/flang/runtime/Float128Math/trunc.cpp index 3cab219ce31c2d..ca15a739c030e8 100644 --- a/flang/runtime/Float128Math/trunc.cpp +++ b/flang/runtime/Float128Math/trunc.cpp @@ -18,7 +18,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(TruncF128)( CppTypeFor x) { - return Trunc::invoke(x); + return Trunc::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/y0.cpp b/flang/runtime/Float128Math/y0.cpp index f3e2ee454aeab5..d6f39aac1053a8 100644 --- a/flang/runtime/Float128Math/y0.cpp +++ b/flang/runtime/Float128Math/y0.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(Y0F128)( CppTypeFor x) { - return Y0::invoke(x); + return Y0::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/y1.cpp b/flang/runtime/Float128Math/y1.cpp index c117bbcb2b5a86..477d36a9ea3c66 100644 --- a/flang/runtime/Float128Math/y1.cpp +++ b/flang/runtime/Float128Math/y1.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(Y1F128)( CppTypeFor x) { - return Y1::invoke(x); + return Y1::invoke(x); } #endif diff --git a/flang/runtime/Float128Math/yn.cpp b/flang/runtime/Float128Math/yn.cpp index 237bc2866a0d5b..3a040cc8858970 100644 --- a/flang/runtime/Float128Math/yn.cpp +++ b/flang/runtime/Float128Math/yn.cpp @@ -14,7 +14,7 @@ extern "C" { #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 CppTypeFor RTDEF(YnF128)( int n, CppTypeFor x) { - return Yn::invoke(n, x); + return Yn::invoke(n, x); } #endif diff --git a/flang/runtime/extrema.cpp b/flang/runtime/extrema.cpp index fc2b4e165cb269..61afb0458430db 100644 --- a/flang/runtime/extrema.cpp +++ b/flang/runtime/extrema.cpp @@ -424,62 +424,6 @@ RT_EXT_API_GROUP_END // MAXVAL and MINVAL -template -struct MaxOrMinIdentity { - using Type = CppTypeFor; - static constexpr RT_API_ATTRS Type Value() { - return IS_MAXVAL ? std::numeric_limits::lowest() - : std::numeric_limits::max(); - } -}; - -// std::numeric_limits<> may not know int128_t -template -struct MaxOrMinIdentity { - using Type = CppTypeFor; - static constexpr RT_API_ATTRS Type Value() { - return IS_MAXVAL ? Type{1} << 127 : ~Type{0} >> 1; - } -}; - -#if HAS_FLOAT128 -// std::numeric_limits<> may not support __float128. -// -// Usage of GCC quadmath.h's FLT128_MAX is complicated by the fact that -// even GCC complains about 'Q' literal suffix under -Wpedantic. -// We just recreate FLT128_MAX ourselves. -// -// This specialization must engage only when -// CppTypeFor is __float128. -template -struct MaxOrMinIdentity, __float128>>> { - using Type = __float128; - static RT_API_ATTRS Type Value() { - // Create a buffer to store binary representation of __float128 constant. - constexpr std::size_t alignment = - std::max(alignof(Type), alignof(std::uint64_t)); - alignas(alignment) char data[sizeof(Type)]; - - // First, verify that our interpretation of __float128 format is correct, - // e.g. by checking at least one known constant. - *reinterpret_cast(data) = Type(1.0); - if (*reinterpret_cast(data) != 0 || - *(reinterpret_cast(data) + 1) != 0x3FFF000000000000) { - Terminator terminator{__FILE__, __LINE__}; - terminator.Crash("not yet implemented: no full support for __float128"); - } - - // Recreate FLT128_MAX. - *reinterpret_cast(data) = 0xFFFFFFFFFFFFFFFF; - *(reinterpret_cast(data) + 1) = 0x7FFEFFFFFFFFFFFF; - Type max = *reinterpret_cast(data); - return IS_MAXVAL ? -max : max; - } -}; -#endif // HAS_FLOAT128 - template class NumericExtremumAccumulator { public: @@ -773,42 +717,25 @@ RT_EXT_API_GROUP_END // NORM2 -template struct Norm2Helper { - RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim, - const Descriptor *mask, Terminator &terminator) const { - DoMaxMinNorm2::Type>( - result, x, dim, mask, "NORM2", terminator); - } -}; - extern "C" { RT_EXT_API_GROUP_BEGIN // TODO: REAL(2 & 3) CppTypeFor RTDEF(Norm2_4)( const Descriptor &x, const char *source, int line, int dim) { - return GetTotalReduction(x, source, line, dim, nullptr, - Norm2AccumulatorGetter<4>::create(x), "NORM2"); + return GetTotalReduction( + x, source, line, dim, nullptr, Norm2Accumulator<4>{x}, "NORM2"); } CppTypeFor RTDEF(Norm2_8)( const Descriptor &x, const char *source, int line, int dim) { - return GetTotalReduction(x, source, line, dim, nullptr, - Norm2AccumulatorGetter<8>::create(x), "NORM2"); + return GetTotalReduction( + x, source, line, dim, nullptr, Norm2Accumulator<8>{x}, "NORM2"); } #if LDBL_MANT_DIG == 64 CppTypeFor RTDEF(Norm2_10)( const Descriptor &x, const char *source, int line, int dim) { - return GetTotalReduction(x, source, line, dim, - nullptr, Norm2AccumulatorGetter<10>::create(x), "NORM2"); -} -#endif -#if LDBL_MANT_DIG == 113 -// The __float128 implementation resides in FortranFloat128Math library. -CppTypeFor RTDEF(Norm2_16)( - const Descriptor &x, const char *source, int line, int dim) { - return GetTotalReduction(x, source, line, dim, - nullptr, Norm2AccumulatorGetter<16>::create(x), "NORM2"); + return GetTotalReduction( + x, source, line, dim, nullptr, Norm2Accumulator<10>{x}, "NORM2"); } #endif diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h new file mode 100644 index 00000000000000..b16440dbc2241a --- /dev/null +++ b/flang/runtime/numeric-templates.h @@ -0,0 +1,339 @@ +//===-- runtime/numeric-templates.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Generic class and function templates used for implementing +// various numeric intrinsics (EXPONENT, FRACTION, etc.). +// +// This header file also defines generic templates for "basic" +// math operations like abs, isnan, etc. The Float128Math +// library provides specializations for these templates +// for the data type corresponding to CppTypeFor +// on the target. + +#ifndef FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_ +#define FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_ + +#include "terminator.h" +#include "tools.h" +#include "flang/Common/float128.h" +#include +#include + +namespace Fortran::runtime { + +// MAX/MIN/LOWEST values for different data types. + +// MaxOrMinIdentity returns MAX or LOWEST value of the given type. +template +struct MaxOrMinIdentity { + using Type = CppTypeFor; + static constexpr RT_API_ATTRS Type Value() { + return IS_MAXVAL ? std::numeric_limits::lowest() + : std::numeric_limits::max(); + } +}; + +// std::numeric_limits<> may not know int128_t +template +struct MaxOrMinIdentity { + using Type = CppTypeFor; + static constexpr RT_API_ATTRS Type Value() { + return IS_MAXVAL ? Type{1} << 127 : ~Type{0} >> 1; + } +}; + +#if HAS_FLOAT128 +// std::numeric_limits<> may not support __float128. +// +// Usage of GCC quadmath.h's FLT128_MAX is complicated by the fact that +// even GCC complains about 'Q' literal suffix under -Wpedantic. +// We just recreate FLT128_MAX ourselves. +// +// This specialization must engage only when +// CppTypeFor is __float128. +template +struct MaxOrMinIdentity, __float128>>> { + using Type = __float128; + static RT_API_ATTRS Type Value() { + // Create a buffer to store binary representation of __float128 constant. + constexpr std::size_t alignment = + std::max(alignof(Type), alignof(std::uint64_t)); + alignas(alignment) char data[sizeof(Type)]; + + // First, verify that our interpretation of __float128 format is correct, + // e.g. by checking at least one known constant. + *reinterpret_cast(data) = Type(1.0); + if (*reinterpret_cast(data) != 0 || + *(reinterpret_cast(data) + 1) != 0x3FFF000000000000) { + Terminator terminator{__FILE__, __LINE__}; + terminator.Crash("not yet implemented: no full support for __float128"); + } + + // Recreate FLT128_MAX. + *reinterpret_cast(data) = 0xFFFFFFFFFFFFFFFF; + *(reinterpret_cast(data) + 1) = 0x7FFEFFFFFFFFFFFF; + Type max = *reinterpret_cast(data); + return IS_MAXVAL ? -max : max; + } +}; +#endif // HAS_FLOAT128 + +// Minimum finite representable value. +// For floating-point types, returns minimum positive normalized value. +template struct MinValue { + static RT_API_ATTRS T get() { return std::numeric_limits::min(); } +}; + +#if HAS_FLOAT128 +template <> struct MinValue> { + using Type = CppTypeFor; + static RT_API_ATTRS Type get() { + // Create a buffer to store binary representation of __float128 constant. + constexpr std::size_t alignment = + std::max(alignof(Type), alignof(std::uint64_t)); + alignas(alignment) char data[sizeof(Type)]; + + // First, verify that our interpretation of __float128 format is correct, + // e.g. by checking at least one known constant. + *reinterpret_cast(data) = Type(1.0); + if (*reinterpret_cast(data) != 0 || + *(reinterpret_cast(data) + 1) != 0x3FFF000000000000) { + Terminator terminator{__FILE__, __LINE__}; + terminator.Crash("not yet implemented: no full support for __float128"); + } + + // Recreate FLT128_MIN. + *reinterpret_cast(data) = 0; + *(reinterpret_cast(data) + 1) = 0x1000000000000; + return *reinterpret_cast(data); + } +}; +#endif // HAS_FLOAT128 + +template struct ABSTy { + static constexpr RT_API_ATTRS T compute(T x) { return std::abs(x); } +}; + +template struct FREXPTy { + static constexpr RT_API_ATTRS T compute(T x, int *e) { + return std::frexp(x, e); + } +}; + +template struct ILOGBTy { + static constexpr RT_API_ATTRS int compute(T x) { return std::ilogb(x); } +}; + +template struct ISINFTy { + static constexpr RT_API_ATTRS bool compute(T x) { return std::isinf(x); } +}; + +template struct ISNANTy { + static constexpr RT_API_ATTRS bool compute(T x) { return std::isnan(x); } +}; + +template struct LDEXPTy { + template static constexpr RT_API_ATTRS T compute(T x, ET e) { + return std::ldexp(x, e); + } +}; + +template struct MAXTy { + static constexpr RT_API_ATTRS T compute() { + return std::numeric_limits::max(); + } +}; + +#if LDBL_MANT_DIG == 113 || HAS_FLOAT128 +template <> struct MAXTy> { + static CppTypeFor compute() { + return MaxOrMinIdentity::Value(); + } +}; +#endif + +template struct MINTy { + static constexpr RT_API_ATTRS T compute() { return MinValue::get(); } +}; + +template struct QNANTy { + static constexpr RT_API_ATTRS T compute() { + return std::numeric_limits::quiet_NaN(); + } +}; + +template struct SQRTTy { + static constexpr RT_API_ATTRS T compute(T x) { return std::sqrt(x); } +}; + +// EXPONENT (16.9.75) +template +inline RT_API_ATTRS RESULT Exponent(ARG x) { + if (ISINFTy::compute(x) || ISNANTy::compute(x)) { + return MAXTy::compute(); // +/-Inf, NaN -> HUGE(0) + } else if (x == 0) { + return 0; // 0 -> 0 + } else { + return ILOGBTy::compute(x) + 1; + } +} + +// Suppress the warnings about calling __host__-only std::frexp, +// defined in C++ STD header files, from __device__ code. +RT_DIAG_PUSH +RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN + +// FRACTION (16.9.80) +template inline RT_API_ATTRS T Fraction(T x) { + if (ISNANTy::compute(x)) { + return x; // NaN -> same NaN + } else if (ISINFTy::compute(x)) { + return QNANTy::compute(); // +/-Inf -> NaN + } else if (x == 0) { + return x; // 0 -> same 0 + } else { + int ignoredExp; + return FREXPTy::compute(x, &ignoredExp); + } +} + +RT_DIAG_POP + +// SET_EXPONENT (16.9.171) +template inline RT_API_ATTRS T SetExponent(T x, std::int64_t p) { + if (ISNANTy::compute(x)) { + return x; // NaN -> same NaN + } else if (ISINFTy::compute(x)) { + return QNANTy::compute(); // +/-Inf -> NaN + } else if (x == 0) { + return x; // return negative zero if x is negative zero + } else { + int expo{ILOGBTy::compute(x) + 1}; + auto ip{static_cast(p - expo)}; + if (ip != p - expo) { + ip = p < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); + } + return LDEXPTy::compute(x, ip); // x*2**(p-e) + } +} + +// MOD & MODULO (16.9.135, .136) +template +inline RT_API_ATTRS T RealMod( + T a, T p, const char *sourceFile, int sourceLine) { + if (p == 0) { + Terminator{sourceFile, sourceLine}.Crash( + IS_MODULO ? "MODULO with P==0" : "MOD with P==0"); + } + if (ISNANTy::compute(a) || ISNANTy::compute(p) || + ISINFTy::compute(a)) { + return QNANTy::compute(); + } else if (ISINFTy::compute(p)) { + return a; + } + T aAbs{ABSTy::compute(a)}; + T pAbs{ABSTy::compute(p)}; + if (aAbs <= static_cast(std::numeric_limits::max()) && + pAbs <= static_cast(std::numeric_limits::max())) { + if (auto aInt{static_cast(a)}; a == aInt) { + if (auto pInt{static_cast(p)}; p == pInt) { + // Fast exact case for integer operands + auto mod{aInt - (aInt / pInt) * pInt}; + if (IS_MODULO && (aInt > 0) != (pInt > 0)) { + mod += pInt; + } + return static_cast(mod); + } + } + } + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + // std::fmod() semantics on signed operands seems to match + // the requirements of MOD(). MODULO() needs adjustment. + T result{std::fmod(a, p)}; + if constexpr (IS_MODULO) { + if ((a < 0) != (p < 0)) { + if (result == 0.) { + result = -result; + } else { + result += p; + } + } + } + return result; + } else { + // The standard defines MOD(a,p)=a-AINT(a/p)*p and + // MODULO(a,p)=a-FLOOR(a/p)*p, but those definitions lose + // precision badly due to cancellation when ABS(a) is + // much larger than ABS(p). + // Insights: + // - MOD(a,p)=MOD(a-n*p,p) when a>0, p>0, integer n>0, and a>=n*p + // - when n is a power of two, n*p is exact + // - as a>=n*p, a-n*p does not round. + // So repeatedly reduce a by all n*p in decreasing order of n; + // what's left is the desired remainder. This is basically + // the same algorithm as arbitrary precision binary long division, + // discarding the quotient. + T tmp{aAbs}; + for (T adj{SetExponent(pAbs, Exponent(aAbs))}; tmp >= pAbs; adj /= 2) { + if (tmp >= adj) { + tmp -= adj; + if (tmp == 0) { + break; + } + } + } + if (a < 0) { + tmp = -tmp; + } + if constexpr (IS_MODULO) { + if ((a < 0) != (p < 0)) { + tmp += p; + } + } + return tmp; + } +} + +// RRSPACING (16.9.164) +template inline RT_API_ATTRS T RRSpacing(T x) { + if (ISNANTy::compute(x)) { + return x; // NaN -> same NaN + } else if (ISINFTy::compute(x)) { + return QNANTy::compute(); // +/-Inf -> NaN + } else if (x == 0) { + return 0; // 0 -> 0 + } else { + return LDEXPTy::compute( + ABSTy::compute(x), PREC - (ILOGBTy::compute(x) + 1)); + } +} + +// SPACING (16.9.180) +template inline RT_API_ATTRS T Spacing(T x) { + if (ISNANTy::compute(x)) { + return x; // NaN -> same NaN + } else if (ISINFTy::compute(x)) { + return QNANTy::compute(); // +/-Inf -> NaN + } else if (x == 0) { + // The standard-mandated behavior seems broken, since TINY() can't be + // subnormal. + return MINTy::compute(); // 0 -> TINY(x) + } else { + T result{LDEXPTy::compute( + static_cast(1.0), ILOGBTy::compute(x) + 1 - PREC)}; // 2**(e-p) + return result == 0 ? /*TINY(x)*/ MINTy::compute() : result; + } +} + +} // namespace Fortran::runtime + +#endif // FORTRAN_RUNTIME_NUMERIC_TEMPLATES_H_ diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp index ede00d69f20e25..abd3e500029fe4 100644 --- a/flang/runtime/numeric.cpp +++ b/flang/runtime/numeric.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/numeric.h" +#include "numeric-templates.h" #include "terminator.h" #include "flang/Common/float128.h" #include @@ -68,58 +69,6 @@ inline RT_API_ATTRS RESULT Floor(ARG x) { return std::floor(x); } -// EXPONENT (16.9.75) -template -inline RT_API_ATTRS RESULT Exponent(ARG x) { - if (std::isinf(x) || std::isnan(x)) { - return std::numeric_limits::max(); // +/-Inf, NaN -> HUGE(0) - } else if (x == 0) { - return 0; // 0 -> 0 - } else { - return std::ilogb(x) + 1; - } -} - -// Suppress the warnings about calling __host__-only std::frexp, -// defined in C++ STD header files, from __device__ code. -RT_DIAG_PUSH -RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN - -// FRACTION (16.9.80) -template inline RT_API_ATTRS T Fraction(T x) { - if (std::isnan(x)) { - return x; // NaN -> same NaN - } else if (std::isinf(x)) { - return std::numeric_limits::quiet_NaN(); // +/-Inf -> NaN - } else if (x == 0) { - return x; // 0 -> same 0 - } else { - int ignoredExp; - return std::frexp(x, &ignoredExp); - } -} - -RT_DIAG_POP - -// SET_EXPONENT (16.9.171) -template inline RT_API_ATTRS T SetExponent(T x, std::int64_t p) { - if (std::isnan(x)) { - return x; // NaN -> same NaN - } else if (std::isinf(x)) { - return std::numeric_limits::quiet_NaN(); // +/-Inf -> NaN - } else if (x == 0) { - return x; // return negative zero if x is negative zero - } else { - int expo{std::ilogb(x) + 1}; - auto ip{static_cast(p - expo)}; - if (ip != p - expo) { - ip = p < 0 ? std::numeric_limits::min() - : std::numeric_limits::max(); - } - return std::ldexp(x, ip); // x*2**(p-e) - } -} - // MOD & MODULO (16.9.135, .136) template inline RT_API_ATTRS T IntMod(T x, T p, const char *sourceFile, int sourceLine) { @@ -133,94 +82,6 @@ inline RT_API_ATTRS T IntMod(T x, T p, const char *sourceFile, int sourceLine) { } return mod; } -template -inline RT_API_ATTRS T RealMod( - T a, T p, const char *sourceFile, int sourceLine) { - if (p == 0) { - Terminator{sourceFile, sourceLine}.Crash( - IS_MODULO ? "MODULO with P==0" : "MOD with P==0"); - } - if (std::isnan(a) || std::isnan(p) || std::isinf(a)) { - return std::numeric_limits::quiet_NaN(); - } else if (std::isinf(p)) { - return a; - } - T aAbs{std::abs(a)}; - T pAbs{std::abs(p)}; - if (aAbs <= static_cast(std::numeric_limits::max()) && - pAbs <= static_cast(std::numeric_limits::max())) { - if (auto aInt{static_cast(a)}; a == aInt) { - if (auto pInt{static_cast(p)}; p == pInt) { - // Fast exact case for integer operands - auto mod{aInt - (aInt / pInt) * pInt}; - if (IS_MODULO && (aInt > 0) != (pInt > 0)) { - mod += pInt; - } - return static_cast(mod); - } - } - } - if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v) { - // std::fmod() semantics on signed operands seems to match - // the requirements of MOD(). MODULO() needs adjustment. - T result{std::fmod(a, p)}; - if constexpr (IS_MODULO) { - if ((a < 0) != (p < 0)) { - if (result == 0.) { - result = -result; - } else { - result += p; - } - } - } - return result; - } else { - // The standard defines MOD(a,p)=a-AINT(a/p)*p and - // MODULO(a,p)=a-FLOOR(a/p)*p, but those definitions lose - // precision badly due to cancellation when ABS(a) is - // much larger than ABS(p). - // Insights: - // - MOD(a,p)=MOD(a-n*p,p) when a>0, p>0, integer n>0, and a>=n*p - // - when n is a power of two, n*p is exact - // - as a>=n*p, a-n*p does not round. - // So repeatedly reduce a by all n*p in decreasing order of n; - // what's left is the desired remainder. This is basically - // the same algorithm as arbitrary precision binary long division, - // discarding the quotient. - T tmp{aAbs}; - for (T adj{SetExponent(pAbs, Exponent(aAbs))}; tmp >= pAbs; adj /= 2) { - if (tmp >= adj) { - tmp -= adj; - if (tmp == 0) { - break; - } - } - } - if (a < 0) { - tmp = -tmp; - } - if constexpr (IS_MODULO) { - if ((a < 0) != (p < 0)) { - tmp += p; - } - } - return tmp; - } -} - -// RRSPACING (16.9.164) -template inline RT_API_ATTRS T RRSpacing(T x) { - if (std::isnan(x)) { - return x; // NaN -> same NaN - } else if (std::isinf(x)) { - return std::numeric_limits::quiet_NaN(); // +/-Inf -> NaN - } else if (x == 0) { - return 0; // 0 -> 0 - } else { - return std::ldexp(std::abs(x), PREC - (std::ilogb(x) + 1)); - } -} // SCALE (16.9.166) template inline RT_API_ATTRS T Scale(T x, std::int64_t p) { @@ -229,7 +90,7 @@ template inline RT_API_ATTRS T Scale(T x, std::int64_t p) { ip = p < 0 ? std::numeric_limits::min() : std::numeric_limits::max(); } - return std::ldexp(x, p); // x*2**p + return std::ldexp(x, ip); // x*2**p } // SELECTED_INT_KIND (16.9.169) @@ -300,23 +161,6 @@ inline RT_API_ATTRS CppTypeFor SelectedRealKind( return error ? error : kind; } -// SPACING (16.9.180) -template inline RT_API_ATTRS T Spacing(T x) { - if (std::isnan(x)) { - return x; // NaN -> same NaN - } else if (std::isinf(x)) { - return std::numeric_limits::quiet_NaN(); // +/-Inf -> NaN - } else if (x == 0) { - // The standard-mandated behavior seems broken, since TINY() can't be - // subnormal. - return std::numeric_limits::min(); // 0 -> TINY(x) - } else { - T result{ - std::ldexp(static_cast(1.0), std::ilogb(x) + 1 - PREC)}; // 2**(e-p) - return result == 0 ? /*TINY(x)*/ std::numeric_limits::min() : result; - } -} - // NEAREST (16.9.139) template inline RT_API_ATTRS T Nearest(T x, bool positive) { @@ -480,15 +324,6 @@ CppTypeFor RTDEF(Exponent10_8)( CppTypeFor x) { return Exponent>(x); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(Exponent16_4)( - CppTypeFor x) { - return Exponent>(x); -} -CppTypeFor RTDEF(Exponent16_8)( - CppTypeFor x) { - return Exponent>(x); -} #endif CppTypeFor RTDEF(Floor4_1)( @@ -596,11 +431,6 @@ CppTypeFor RTDEF(Fraction10)( CppTypeFor x) { return Fraction(x); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(Fraction16)( - CppTypeFor x) { - return Fraction(x); -} #endif bool RTDEF(IsFinite4)(CppTypeFor x) { @@ -683,12 +513,6 @@ CppTypeFor RTDEF(ModReal10)( const char *sourceFile, int sourceLine) { return RealMod(x, p, sourceFile, sourceLine); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(ModReal16)( - CppTypeFor x, CppTypeFor p, - const char *sourceFile, int sourceLine) { - return RealMod(x, p, sourceFile, sourceLine); -} #endif CppTypeFor RTDEF(ModuloInteger1)( @@ -739,12 +563,6 @@ CppTypeFor RTDEF(ModuloReal10)( const char *sourceFile, int sourceLine) { return RealMod(x, p, sourceFile, sourceLine); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(ModuloReal16)( - CppTypeFor x, CppTypeFor p, - const char *sourceFile, int sourceLine) { - return RealMod(x, p, sourceFile, sourceLine); -} #endif CppTypeFor RTDEF(Nearest4)( @@ -760,11 +578,6 @@ CppTypeFor RTDEF(Nearest10)( CppTypeFor x, bool positive) { return Nearest<64>(x, positive); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(Nearest16)( - CppTypeFor x, bool positive) { - return Nearest<113>(x, positive); -} #endif CppTypeFor RTDEF(Nint4_1)( @@ -872,11 +685,6 @@ CppTypeFor RTDEF(RRSpacing10)( CppTypeFor x) { return RRSpacing<64>(x); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(RRSpacing16)( - CppTypeFor x) { - return RRSpacing<113>(x); -} #endif CppTypeFor RTDEF(SetExponent4)( @@ -892,11 +700,6 @@ CppTypeFor RTDEF(SetExponent10)( CppTypeFor x, std::int64_t p) { return SetExponent(x, p); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(SetExponent16)( - CppTypeFor x, std::int64_t p) { - return SetExponent(x, p); -} #endif CppTypeFor RTDEF(Scale4)( @@ -912,11 +715,6 @@ CppTypeFor RTDEF(Scale10)( CppTypeFor x, std::int64_t p) { return Scale(x, p); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(Scale16)( - CppTypeFor x, std::int64_t p) { - return Scale(x, p); -} #endif // SELECTED_INT_KIND @@ -971,11 +769,6 @@ CppTypeFor RTDEF(Spacing10)( CppTypeFor x) { return Spacing<64>(x); } -#elif LDBL_MANT_DIG == 113 -CppTypeFor RTDEF(Spacing16)( - CppTypeFor x) { - return Spacing<113>(x); -} #endif CppTypeFor RTDEF(FPow4i)( diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h index 0891bc021ff753..5b793deb2a123d 100644 --- a/flang/runtime/reduction-templates.h +++ b/flang/runtime/reduction-templates.h @@ -1,4 +1,4 @@ -//===-- runtime/reduction-templates.h -------------------------------------===// +//===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -21,6 +21,7 @@ #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ +#include "numeric-templates.h" #include "terminator.h" #include "tools.h" #include "flang/Runtime/cpp-type.h" @@ -385,7 +386,7 @@ template using Norm2AccumType = CppTypeFor; -template class Norm2Accumulator { +template class Norm2Accumulator { public: using Type = CppTypeFor; using AccumType = Norm2AccumType; @@ -395,10 +396,10 @@ template class Norm2Accumulator { template RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { // m * sqrt(1 + sum((others(:)/m)**2)) - *p = static_cast(max_ * SQRT::compute(1 + sum_)); + *p = static_cast(max_ * SQRTTy::compute(1 + sum_)); } RT_API_ATTRS bool Accumulate(Type x) { - auto absX{ABS::compute(static_cast(x))}; + auto absX{ABSTy::compute(static_cast(x))}; if (!max_) { max_ = absX; } else if (absX > max_) { @@ -424,27 +425,12 @@ template class Norm2Accumulator { AccumType sum_{0}; // sum((others(:)/m)**2) }; -// Helper class for creating Norm2Accumulator instance -// based on the given KIND. This helper returns and instance -// that uses std::abs and std::sqrt for the computations. -template class Norm2AccumulatorGetter { - using AccumType = Norm2AccumType; - -public: - struct ABSTy { - static constexpr RT_API_ATTRS AccumType compute(AccumType &&x) { - return std::abs(std::forward(x)); - } - }; - struct SQRTTy { - static constexpr RT_API_ATTRS AccumType compute(AccumType &&x) { - return std::sqrt(std::forward(x)); - } - }; - - using Type = Norm2Accumulator; - - static RT_API_ATTRS Type create(const Descriptor &x) { return Type(x); } +template struct Norm2Helper { + RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim, + const Descriptor *mask, Terminator &terminator) const { + DoMaxMinNorm2>( + result, x, dim, mask, "NORM2", terminator); + } }; } // namespace Fortran::runtime diff --git a/libc/benchmarks/automemcpy/lib/CMakeLists.txt b/libc/benchmarks/automemcpy/lib/CMakeLists.txt index bb6a5631f2c3f6..e66b9045b6074a 100644 --- a/libc/benchmarks/automemcpy/lib/CMakeLists.txt +++ b/libc/benchmarks/automemcpy/lib/CMakeLists.txt @@ -19,7 +19,7 @@ add_custom_command( add_library(automemcpy_implementations "${Implementations}") target_link_libraries(automemcpy_implementations PUBLIC LLVMSupport libc-memory-benchmark) target_include_directories(automemcpy_implementations PRIVATE - ${LIBC_SOURCE_DIR} ${LIBC_SOURCE_DIR}/include ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) + ${LIBC_SOURCE_DIR} ${LIBC_AUTOMEMCPY_INCLUDE_DIR}) target_compile_options(automemcpy_implementations PRIVATE ${LIBC_COMPILE_OPTIONS_NATIVE} "SHELL:-mllvm -combiner-global-alias-analysis" -fno-builtin) llvm_update_compile_flags(automemcpy_implementations) diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake index 5469799f023983..0649e9f7a76709 100644 --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -59,7 +59,6 @@ function(create_object_library fq_target_name) ) target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) target_compile_options(${fq_target_name} PRIVATE ${compile_options}) # The NVPTX target is installed as LLVM-IR but the internal testing toolchain @@ -74,7 +73,6 @@ function(create_object_library fq_target_name) ) target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) target_compile_options(${internal_target_name} PRIVATE ${compile_options} -fno-lto -march=${LIBC_GPU_TARGET_ARCHITECTURE}) endif() @@ -281,7 +279,6 @@ function(create_entrypoint_object fq_target_name) target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options}) target_include_directories(${internal_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${internal_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) add_dependencies(${internal_target_name} ${full_deps_list}) target_link_libraries(${internal_target_name} ${full_deps_list}) @@ -303,7 +300,6 @@ function(create_entrypoint_object fq_target_name) target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING) target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) add_dependencies(${fq_target_name} ${full_deps_list}) target_link_libraries(${fq_target_name} ${full_deps_list}) diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 5981d427b71f8d..836e15d34741b2 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -184,7 +184,6 @@ function(create_libc_unittest fq_target_name) ) target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) target_compile_options(${fq_build_target_name} PRIVATE ${compile_options}) if(NOT LIBC_UNITTEST_CXX_STANDARD) @@ -318,7 +317,6 @@ function(add_libc_fuzzer target_name) ) target_include_directories(${fq_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) target_link_libraries(${fq_target_name} PRIVATE ${link_object_files} @@ -459,7 +457,6 @@ function(add_integration_test test_name) PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) _get_hermetic_test_compile_options(compile_options "${INTEGRATION_TEST_COMPILE_OPTIONS}") target_compile_options(${fq_build_target_name} PRIVATE ${compile_options}) @@ -635,7 +632,6 @@ function(add_libc_hermetic_test test_name) _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") target_include_directories(${fq_build_target_name} SYSTEM PRIVATE ${LIBC_INCLUDE_DIR}) target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}) - target_include_directories(${fq_build_target_name} PRIVATE ${LIBC_SOURCE_DIR}/include) _get_hermetic_test_compile_options(compile_options "${HERMETIC_TEST_COMPILE_OPTIONS}") target_compile_options(${fq_build_target_name} PRIVATE ${compile_options}) diff --git a/libc/cmake/modules/compiler_features/check_fixed_point.cpp b/libc/cmake/modules/compiler_features/check_fixed_point.cpp index 9199340fe652ea..a5192697d43f77 100644 --- a/libc/cmake/modules/compiler_features/check_fixed_point.cpp +++ b/libc/cmake/modules/compiler_features/check_fixed_point.cpp @@ -1,4 +1,4 @@ -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #ifndef LIBC_COMPILER_HAS_FIXED_POINT #error unsupported diff --git a/libc/fuzzing/stdio/printf_fixed_conv_fuzz.cpp b/libc/fuzzing/stdio/printf_fixed_conv_fuzz.cpp index c385c3a8f3e44a..b4a8621891203d 100644 --- a/libc/fuzzing/stdio/printf_fixed_conv_fuzz.cpp +++ b/libc/fuzzing/stdio/printf_fixed_conv_fuzz.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "src/stdio/snprintf.h" -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/fixed_point/fx_bits.h" #include "src/__support/fixed_point/fx_rep.h" diff --git a/libc/include/llvm-libc-types/float128.h b/libc/include/llvm-libc-types/float128.h index e2dc18c040d99e..0b290c676ecc02 100644 --- a/libc/include/llvm-libc-types/float128.h +++ b/libc/include/llvm-libc-types/float128.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TYPES_FLOAT128_H #define LLVM_LIBC_TYPES_FLOAT128_H -#include "llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG +#include "../llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG // Currently, C23 `_Float128` type is only defined as a built-in type in GCC 7 // or later, and only for C. For C++, or for clang, `__float128` is defined diff --git a/libc/src/__support/CPP/limits.h b/libc/src/__support/CPP/limits.h index 6440e8cb358fa7..1ffde5f9556f87 100644 --- a/libc/src/__support/CPP/limits.h +++ b/libc/src/__support/CPP/limits.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_CPP_LIMITS_H #define LLVM_LIBC_SRC___SUPPORT_CPP_LIMITS_H -#include "llvm-libc-macros/limits-macros.h" // CHAR_BIT +#include "include/llvm-libc-macros/limits-macros.h" // CHAR_BIT #include "src/__support/CPP/type_traits/is_integral.h" #include "src/__support/CPP/type_traits/is_signed.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE diff --git a/libc/src/__support/CPP/type_traits/is_fixed_point.h b/libc/src/__support/CPP/type_traits/is_fixed_point.h index 09dba8b7ecbcd4..025268bc2979d3 100644 --- a/libc/src/__support/CPP/type_traits/is_fixed_point.h +++ b/libc/src/__support/CPP/type_traits/is_fixed_point.h @@ -12,7 +12,7 @@ #include "src/__support/CPP/type_traits/remove_cv.h" #include "src/__support/macros/attributes.h" -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE::cpp { diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h index 07fcd42c97f3e7..8f6c5887c189e8 100644 --- a/libc/src/__support/HashTable/table.h +++ b/libc/src/__support/HashTable/table.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_TABLE_H #define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_TABLE_H -#include "llvm-libc-types/ENTRY.h" +#include "include/llvm-libc-types/ENTRY.h" #include "src/__support/CPP/bit.h" // bit_ceil #include "src/__support/CPP/new.h" #include "src/__support/HashTable/bitmask.h" diff --git a/libc/src/__support/RPC/rpc_client.h b/libc/src/__support/RPC/rpc_client.h index 571d7cce2a8039..6e1827dbfeea92 100644 --- a/libc/src/__support/RPC/rpc_client.h +++ b/libc/src/__support/RPC/rpc_client.h @@ -11,7 +11,7 @@ #include "rpc.h" -#include "llvm-libc-types/rpc_opcodes_t.h" +#include "include/llvm-libc-types/rpc_opcodes_t.h" namespace LIBC_NAMESPACE { namespace rpc { diff --git a/libc/src/__support/fixed_point/fx_bits.h b/libc/src/__support/fixed_point/fx_bits.h index 6fdbc6f6ece63f..41da45c01e4e19 100644 --- a/libc/src/__support/fixed_point/fx_bits.h +++ b/libc/src/__support/fixed_point/fx_bits.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_BITS_H #define LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_BITS_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h index e1fee62f335eb9..f8593a93684cbc 100644 --- a/libc/src/__support/fixed_point/fx_rep.h +++ b/libc/src/__support/fixed_point/fx_rep.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_REP_H #define LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_REP_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR diff --git a/libc/src/__support/fixed_point/sqrt.h b/libc/src/__support/fixed_point/sqrt.h index 236ebb2857030b..d8df294b18a1a8 100644 --- a/libc/src/__support/fixed_point/sqrt.h +++ b/libc/src/__support/fixed_point/sqrt.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_SQRT_H #define LLVM_LIBC_SRC___SUPPORT_FIXEDPOINT_SQRT_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE diff --git a/libc/src/__support/macros/properties/types.h b/libc/src/__support/macros/properties/types.h index e812a9dfcfd8ab..0a3c834663cc92 100644 --- a/libc/src/__support/macros/properties/types.h +++ b/libc/src/__support/macros/properties/types.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H #define LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H -#include "llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG -#include "llvm-libc-types/float128.h" // float128 +#include "include/llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG +#include "include/llvm-libc-types/float128.h" // float128 #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/compiler.h" #include "src/__support/macros/properties/cpu_features.h" diff --git a/libc/src/math/amdgpu/fmax.cpp b/libc/src/math/amdgpu/fmax.cpp index 09624cc6f092af..09f0f942a042a4 100644 --- a/libc/src/math/amdgpu/fmax.cpp +++ b/libc/src/math/amdgpu/fmax.cpp @@ -15,10 +15,6 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fmax, (double x, double y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) & - cpp::bit_cast(y)); return __builtin_fmax(x, y); } diff --git a/libc/src/math/amdgpu/fmaxf.cpp b/libc/src/math/amdgpu/fmaxf.cpp index f6ed46699a049f..5913a85df63703 100644 --- a/libc/src/math/amdgpu/fmaxf.cpp +++ b/libc/src/math/amdgpu/fmaxf.cpp @@ -8,17 +8,11 @@ #include "src/math/fmaxf.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fmaxf, (float x, float y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) & - cpp::bit_cast(y)); return __builtin_fmaxf(x, y); } diff --git a/libc/src/math/amdgpu/fmin.cpp b/libc/src/math/amdgpu/fmin.cpp index 8977ff7a066c6b..0d6f3521dcb705 100644 --- a/libc/src/math/amdgpu/fmin.cpp +++ b/libc/src/math/amdgpu/fmin.cpp @@ -8,17 +8,11 @@ #include "src/math/fmin.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fmin, (double x, double y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) | - cpp::bit_cast(y)); return __builtin_fmin(x, y); } diff --git a/libc/src/math/amdgpu/fminf.cpp b/libc/src/math/amdgpu/fminf.cpp index 3be55257f61649..42744abfb3b02f 100644 --- a/libc/src/math/amdgpu/fminf.cpp +++ b/libc/src/math/amdgpu/fminf.cpp @@ -8,17 +8,11 @@ #include "src/math/fminf.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fminf, (float x, float y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) | - cpp::bit_cast(y)); return __builtin_fminf(x, y); } diff --git a/libc/src/math/nvptx/fmax.cpp b/libc/src/math/nvptx/fmax.cpp index 09624cc6f092af..3ba65d7eccd369 100644 --- a/libc/src/math/nvptx/fmax.cpp +++ b/libc/src/math/nvptx/fmax.cpp @@ -8,17 +8,11 @@ #include "src/math/fmax.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fmax, (double x, double y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) & - cpp::bit_cast(y)); return __builtin_fmax(x, y); } diff --git a/libc/src/math/nvptx/fmaxf.cpp b/libc/src/math/nvptx/fmaxf.cpp index f6ed46699a049f..e977082b39f403 100644 --- a/libc/src/math/nvptx/fmaxf.cpp +++ b/libc/src/math/nvptx/fmaxf.cpp @@ -15,10 +15,6 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fmaxf, (float x, float y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) & - cpp::bit_cast(y)); return __builtin_fmaxf(x, y); } diff --git a/libc/src/math/nvptx/fmin.cpp b/libc/src/math/nvptx/fmin.cpp index 8977ff7a066c6b..0d6f3521dcb705 100644 --- a/libc/src/math/nvptx/fmin.cpp +++ b/libc/src/math/nvptx/fmin.cpp @@ -8,17 +8,11 @@ #include "src/math/fmin.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fmin, (double x, double y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) | - cpp::bit_cast(y)); return __builtin_fmin(x, y); } diff --git a/libc/src/math/nvptx/fminf.cpp b/libc/src/math/nvptx/fminf.cpp index 3be55257f61649..42744abfb3b02f 100644 --- a/libc/src/math/nvptx/fminf.cpp +++ b/libc/src/math/nvptx/fminf.cpp @@ -8,17 +8,11 @@ #include "src/math/fminf.h" -#include "src/__support/CPP/bit.h" #include "src/__support/common.h" -#include "src/__support/macros/optimization.h" namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fminf, (float x, float y)) { - // FIXME: The builtin function does not correctly handle the +/-0.0 case. - if (LIBC_UNLIKELY(x == y)) - return cpp::bit_cast(cpp::bit_cast(x) | - cpp::bit_cast(y)); return __builtin_fminf(x, y); } diff --git a/libc/src/stdfix/abshk.h b/libc/src/stdfix/abshk.h index 80dc73053dfb45..13c9300caab883 100644 --- a/libc/src/stdfix/abshk.h +++ b/libc/src/stdfix/abshk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSHK_H #define LLVM_LIBC_SRC_STDFIX_ABSHK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/abshr.h b/libc/src/stdfix/abshr.h index 035f9a6de222e8..5acd0cfc4a60db 100644 --- a/libc/src/stdfix/abshr.h +++ b/libc/src/stdfix/abshr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSHR_H #define LLVM_LIBC_SRC_STDFIX_ABSHR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/absk.h b/libc/src/stdfix/absk.h index 426415de28e6ae..73dfcac0ac8e7f 100644 --- a/libc/src/stdfix/absk.h +++ b/libc/src/stdfix/absk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSK_H #define LLVM_LIBC_SRC_STDFIX_ABSK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/abslk.h b/libc/src/stdfix/abslk.h index 21e33f856bfc65..7de116fa227932 100644 --- a/libc/src/stdfix/abslk.h +++ b/libc/src/stdfix/abslk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSLK_H #define LLVM_LIBC_SRC_STDFIX_ABSLK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/abslr.h b/libc/src/stdfix/abslr.h index ebca35e58aa510..bf5b585bbbb669 100644 --- a/libc/src/stdfix/abslr.h +++ b/libc/src/stdfix/abslr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSLR_H #define LLVM_LIBC_SRC_STDFIX_ABSLR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/absr.h b/libc/src/stdfix/absr.h index 2744fcb5a7eccc..b5ead7ce14e2a0 100644 --- a/libc/src/stdfix/absr.h +++ b/libc/src/stdfix/absr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ABSR_H #define LLVM_LIBC_SRC_STDFIX_ABSR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundhk.h b/libc/src/stdfix/roundhk.h index 06de5cc05cdbe4..9a5c874cc030db 100644 --- a/libc/src/stdfix/roundhk.h +++ b/libc/src/stdfix/roundhk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDHK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDHK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundhr.h b/libc/src/stdfix/roundhr.h index 6729bf5b139973..ba5a67945d6c3b 100644 --- a/libc/src/stdfix/roundhr.h +++ b/libc/src/stdfix/roundhr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDHR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDHR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundk.h b/libc/src/stdfix/roundk.h index 02fb9a8c9b1a86..e9fa6d8f9c3b8c 100644 --- a/libc/src/stdfix/roundk.h +++ b/libc/src/stdfix/roundk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundlk.h b/libc/src/stdfix/roundlk.h index 28be9c00549498..5fa0e90e855a64 100644 --- a/libc/src/stdfix/roundlk.h +++ b/libc/src/stdfix/roundlk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDLK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDLK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundlr.h b/libc/src/stdfix/roundlr.h index be97a35a64204d..c015292e8f3f28 100644 --- a/libc/src/stdfix/roundlr.h +++ b/libc/src/stdfix/roundlr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDLR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDLR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundr.h b/libc/src/stdfix/roundr.h index 15523f8b6c9a38..b5b1375c882e03 100644 --- a/libc/src/stdfix/roundr.h +++ b/libc/src/stdfix/roundr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/rounduhk.h b/libc/src/stdfix/rounduhk.h index d1c4a4416d7636..85ebf2903ec7e9 100644 --- a/libc/src/stdfix/rounduhk.h +++ b/libc/src/stdfix/rounduhk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDUHK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDUHK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/rounduhr.h b/libc/src/stdfix/rounduhr.h index 6cecb733dd3b18..1be0aab1f5a79d 100644 --- a/libc/src/stdfix/rounduhr.h +++ b/libc/src/stdfix/rounduhr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDUHR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDUHR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/rounduk.h b/libc/src/stdfix/rounduk.h index 4511d69525c5d5..8dae89586c4901 100644 --- a/libc/src/stdfix/rounduk.h +++ b/libc/src/stdfix/rounduk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDUK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDUK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundulk.h b/libc/src/stdfix/roundulk.h index 8bd90beeb830c5..81dfd1dceb6001 100644 --- a/libc/src/stdfix/roundulk.h +++ b/libc/src/stdfix/roundulk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDULK_H #define LLVM_LIBC_SRC_STDFIX_ROUNDULK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundulr.h b/libc/src/stdfix/roundulr.h index 65e5c27b1c8531..002fc94907c613 100644 --- a/libc/src/stdfix/roundulr.h +++ b/libc/src/stdfix/roundulr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDULR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDULR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/roundur.h b/libc/src/stdfix/roundur.h index 110e578da79319..72de44b1e0c4e5 100644 --- a/libc/src/stdfix/roundur.h +++ b/libc/src/stdfix/roundur.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_ROUNDUR_H #define LLVM_LIBC_SRC_STDFIX_ROUNDUR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/sqrtuhk.h b/libc/src/stdfix/sqrtuhk.h index b57340003fa03c..80000a0079696d 100644 --- a/libc/src/stdfix/sqrtuhk.h +++ b/libc/src/stdfix/sqrtuhk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_SQRTUHK_H #define LLVM_LIBC_SRC_STDFIX_SQRTUHK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/sqrtuhr.h b/libc/src/stdfix/sqrtuhr.h index 6b629a29de3c88..fd95f0924e8d48 100644 --- a/libc/src/stdfix/sqrtuhr.h +++ b/libc/src/stdfix/sqrtuhr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_SQRTUHR_H #define LLVM_LIBC_SRC_STDFIX_SQRTUHR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/sqrtuk.h b/libc/src/stdfix/sqrtuk.h index 6bd7a2608716cd..04d0adadde9ad2 100644 --- a/libc/src/stdfix/sqrtuk.h +++ b/libc/src/stdfix/sqrtuk.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_SQRTUK_H #define LLVM_LIBC_SRC_STDFIX_SQRTUK_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/sqrtulr.h b/libc/src/stdfix/sqrtulr.h index d1982a6b1c0518..284adaaf35bf59 100644 --- a/libc/src/stdfix/sqrtulr.h +++ b/libc/src/stdfix/sqrtulr.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_SQRTULR_H #define LLVM_LIBC_SRC_STDFIX_SQRTULR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdfix/sqrtur.h b/libc/src/stdfix/sqrtur.h index 13f7d1e5e466ec..df9dfe5a0bf39e 100644 --- a/libc/src/stdfix/sqrtur.h +++ b/libc/src/stdfix/sqrtur.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDFIX_SQRTUR_H #define LLVM_LIBC_SRC_STDFIX_SQRTUR_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h index c89971e20686e4..de69c603be6b63 100644 --- a/libc/src/stdio/printf_core/fixed_converter.h +++ b/libc/src/stdio/printf_core/fixed_converter.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FIXED_CONVERTER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FIXED_CONVERTER_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/string_view.h" #include "src/__support/fixed_point/fx_bits.h" #include "src/__support/fixed_point/fx_rep.h" diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index 13fdbf243a22e8..0876116a0bac86 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/str_to_integer.h" diff --git a/libc/src/sys/epoll/epoll_pwait.h b/libc/src/sys/epoll/epoll_pwait.h index 16105850d6942e..9dcb55533009f9 100644 --- a/libc/src/sys/epoll/epoll_pwait.h +++ b/libc/src/sys/epoll/epoll_pwait.h @@ -10,8 +10,8 @@ #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT_H // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/sigset_t.h" -// #include "llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/sigset_t.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" #include diff --git a/libc/src/sys/epoll/epoll_pwait2.h b/libc/src/sys/epoll/epoll_pwait2.h index f7b28d4fbc51d9..622ede6a0f9f9a 100644 --- a/libc/src/sys/epoll/epoll_pwait2.h +++ b/libc/src/sys/epoll/epoll_pwait2.h @@ -10,9 +10,9 @@ #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_PWAIT2_H // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/sigset_t.h" -// #include "llvm-libc-types/struct_epoll_event.h" -// #include "llvm-libc-types/struct_timespec.h" +// #include "include/llvm-libc-types/sigset_t.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/struct_timespec.h" #include diff --git a/libc/src/sys/epoll/epoll_wait.h b/libc/src/sys/epoll/epoll_wait.h index 0dc487bba5bdf4..d51c9100846ce0 100644 --- a/libc/src/sys/epoll/epoll_wait.h +++ b/libc/src/sys/epoll/epoll_wait.h @@ -10,7 +10,7 @@ #define LLVM_LIBC_SRC_SYS_EPOLL_EPOLL_WAIT_H // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" #include diff --git a/libc/src/sys/epoll/linux/epoll_pwait.cpp b/libc/src/sys/epoll/linux/epoll_pwait.cpp index e0c13a7a79602f..ee1b4e66e98444 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait.cpp @@ -15,8 +15,8 @@ #include // For syscall numbers. // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/sigset_t.h" -// #include "llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/sigset_t.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" #include diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp index a44b0c2a9f70f0..671dede2a1058d 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp @@ -15,9 +15,9 @@ #include // For syscall numbers. // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/sigset_t.h" -// #include "llvm-libc-types/struct_epoll_event.h" -// #include "llvm-libc-types/struct_timespec.h" +// #include "include/llvm-libc-types/sigset_t.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/struct_timespec.h" #include diff --git a/libc/src/sys/epoll/linux/epoll_wait.cpp b/libc/src/sys/epoll/linux/epoll_wait.cpp index b643e2dd720cb6..0c43edf7645454 100644 --- a/libc/src/sys/epoll/linux/epoll_wait.cpp +++ b/libc/src/sys/epoll/linux/epoll_wait.cpp @@ -14,8 +14,8 @@ #include // For syscall numbers. // TODO: Use this include once the include headers are also using quotes. -// #include "llvm-libc-types/sigset_t.h" -// #include "llvm-libc-types/struct_epoll_event.h" +// #include "include/llvm-libc-types/sigset_t.h" +// #include "include/llvm-libc-types/struct_epoll_event.h" #include diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 466494f038f4e3..4668f0061975f8 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -26,8 +26,7 @@ function(add_unittest_framework_library name) ${TEST_LIB_SRCS} ${TEST_LIB_HDRS} ) - target_include_directories(${lib} PUBLIC - ${LIBC_SOURCE_DIR} ${LIBC_SOURCE_DIR}/include) + target_include_directories(${lib} PUBLIC ${LIBC_SOURCE_DIR}) list(APPEND compile_options -fno-exceptions -fno-rtti) if(TARGET libc.src.time.clock) target_compile_definitions(${lib} PRIVATE TARGET_SUPPORTS_CLOCK) diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index babd44f9b20630..7b0e4fca83683b 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -8,7 +8,7 @@ #include "LibcTest.h" -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" #include "src/__support/UInt128.h" diff --git a/libc/test/include/stdbit_test.cpp b/libc/test/include/stdbit_test.cpp index 84a4cde18b9f40..acb79ca0f3ff11 100644 --- a/libc/test/include/stdbit_test.cpp +++ b/libc/test/include/stdbit_test.cpp @@ -88,7 +88,7 @@ bool stdc_has_single_bit_ul(unsigned long) noexcept { return false; } bool stdc_has_single_bit_ull(unsigned long long) noexcept { return false; } } -#include "llvm-libc-macros/stdbit-macros.h" +#include "include/llvm-libc-macros/stdbit-macros.h" TEST(LlvmLibcStdbitTest, TypeGenericMacroLeadingZeros) { EXPECT_EQ(stdc_leading_zeros(static_cast(0U)), 0xAAU); diff --git a/libc/test/include/stdckdint_test.cpp b/libc/test/include/stdckdint_test.cpp index 5ac8c95f4ef26f..1180a6de9efe2e 100644 --- a/libc/test/include/stdckdint_test.cpp +++ b/libc/test/include/stdckdint_test.cpp @@ -8,7 +8,7 @@ #include "test/UnitTest/Test.h" -#include "llvm-libc-macros/stdckdint-macros.h" +#include "include/llvm-libc-macros/stdckdint-macros.h" TEST(LlvmLibcStdCkdIntTest, Add) { int result; diff --git a/libc/test/include/sys/queue_test.cpp b/libc/test/include/sys/queue_test.cpp index 48c0e811c61542..c10e48d627caa2 100644 --- a/libc/test/include/sys/queue_test.cpp +++ b/libc/test/include/sys/queue_test.cpp @@ -10,7 +10,7 @@ #include "src/__support/char_vector.h" #include "test/UnitTest/Test.h" -#include "llvm-libc-macros/sys-queue-macros.h" +#include "include/llvm-libc-macros/sys-queue-macros.h" using LIBC_NAMESPACE::CharVector; using LIBC_NAMESPACE::cpp::string; diff --git a/libc/test/integration/startup/CMakeLists.txt b/libc/test/integration/startup/CMakeLists.txt index 08c0d978602b80..fb5d6bc787cc26 100644 --- a/libc/test/integration/startup/CMakeLists.txt +++ b/libc/test/integration/startup/CMakeLists.txt @@ -31,7 +31,6 @@ function(add_startup_test target_name) ${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR} - ${LIBC_SOURCE_DIR}/include ${LIBC_BUILD_DIR} ${LIBC_BUILD_DIR}/include ) diff --git a/libc/test/integration/startup/gpu/rpc_interface_test.cpp b/libc/test/integration/startup/gpu/rpc_interface_test.cpp index 7bbd7085fc2f45..674e2cc1ed7499 100644 --- a/libc/test/integration/startup/gpu/rpc_interface_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_interface_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm-libc-types/test_rpc_opcodes_t.h" +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "test/IntegrationTest/test.h" diff --git a/libc/test/integration/startup/gpu/rpc_stream_test.cpp b/libc/test/integration/startup/gpu/rpc_stream_test.cpp index 9401f822904d04..09a4ae67256e3a 100644 --- a/libc/test/integration/startup/gpu/rpc_stream_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_stream_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm-libc-types/test_rpc_opcodes_t.h" +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "src/__support/integer_to_string.h" diff --git a/libc/test/integration/startup/gpu/rpc_test.cpp b/libc/test/integration/startup/gpu/rpc_test.cpp index bb36b6cedb63cb..4032d890c53ec8 100644 --- a/libc/test/integration/startup/gpu/rpc_test.cpp +++ b/libc/test/integration/startup/gpu/rpc_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm-libc-types/test_rpc_opcodes_t.h" +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include "src/__support/GPU/utils.h" #include "src/__support/RPC/rpc_client.h" #include "test/IntegrationTest/test.h" diff --git a/libc/test/src/__support/fixed_point/fx_bits_test.cpp b/libc/test/src/__support/fixed_point/fx_bits_test.cpp index 5670687273d5ba..58627816eb8d97 100644 --- a/libc/test/src/__support/fixed_point/fx_bits_test.cpp +++ b/libc/test/src/__support/fixed_point/fx_bits_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm-libc-macros/stdfix-macros.h" +#include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/fixed_point/fx_bits.h" #include "src/__support/integer_literals.h" diff --git a/libc/test/src/compiler/stack_chk_guard_test.cpp b/libc/test/src/compiler/stack_chk_guard_test.cpp index 427e20c2ac5046..6b71e155fa3e4d 100644 --- a/libc/test/src/compiler/stack_chk_guard_test.cpp +++ b/libc/test/src/compiler/stack_chk_guard_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm-libc-macros/signal-macros.h" +#include "include/llvm-libc-macros/signal-macros.h" #include "src/__support/macros/sanitizer.h" #include "src/compiler/__stack_chk_fail.h" #include "src/string/memset.h" diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt index 36bfdca1a442d6..878f81f1d573c8 100644 --- a/libc/test/src/math/differential_testing/CMakeLists.txt +++ b/libc/test/src/math/differential_testing/CMakeLists.txt @@ -47,7 +47,6 @@ function(add_diff_binary target_name) ${fq_target_name} PRIVATE ${LIBC_SOURCE_DIR} - ${LIBC_SOURCE_DIR}/include ) if(DIFF_COMPILE_OPTIONS) target_compile_options( diff --git a/libc/utils/LibcTableGenUtil/CMakeLists.txt b/libc/utils/LibcTableGenUtil/CMakeLists.txt index 60208ed790d574..dca6a7bb830655 100644 --- a/libc/utils/LibcTableGenUtil/CMakeLists.txt +++ b/libc/utils/LibcTableGenUtil/CMakeLists.txt @@ -5,5 +5,5 @@ add_llvm_library( DISABLE_LLVM_LINK_LLVM_DYLIB LINK_COMPONENTS Support TableGen ) -target_include_directories(LibcTableGenUtil PUBLIC ${LIBC_SOURCE_DIR} ${LIBC_SOURCE_DIR}/include) +target_include_directories(LibcTableGenUtil PUBLIC ${LIBC_SOURCE_DIR}) target_include_directories(LibcTableGenUtil PRIVATE ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h index d74d65e8993829..e2aabb08c11dac 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -11,7 +11,7 @@ #include "utils/gpu/server/llvmlibc_rpc_server.h" -#include "llvm-libc-types/test_rpc_opcodes_t.h" +#include "include/llvm-libc-types/test_rpc_opcodes_t.h" #include #include diff --git a/libcxx/include/__assert b/libcxx/include/__assert index eb862b5369b258..49769fb4d44978 100644 --- a/libcxx/include/__assert +++ b/libcxx/include/__assert @@ -34,4 +34,85 @@ # define _LIBCPP_ASSUME(expression) ((void)0) #endif +// clang-format off +// Fast hardening mode checks. + +#if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST + +// Enabled checks. +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) +// Disabled checks. +// On most modern platforms, dereferencing a null pointer does not lead to an actual memory access. +# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) +// Overlapping ranges will make algorithms produce incorrect results but don't directly lead to a security +// vulnerability. +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) + +// Extensive hardening mode checks. + +#elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE + +// Enabled checks. +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) +// Disabled checks. +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) + +// Debug hardening mode checks. + +#elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG + +// All checks enabled. +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) + +// Disable all checks if hardening is not enabled. + +#else + +// All checks disabled. +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) + +#endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST +// clang-format on + #endif // _LIBCPP___ASSERT diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h index a45a7183547726..18a864523de06f 100644 --- a/libcxx/include/__atomic/atomic_flag.h +++ b/libcxx/include/__atomic/atomic_flag.h @@ -13,6 +13,7 @@ #include <__atomic/contention_t.h> #include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> +#include <__availability> #include <__chrono/duration.h> #include <__config> #include <__thread/support.h> diff --git a/libcxx/include/__charconv/from_chars_integral.h b/libcxx/include/__charconv/from_chars_integral.h index e969cedb33cbe4..c1f033b37b913e 100644 --- a/libcxx/include/__charconv/from_chars_integral.h +++ b/libcxx/include/__charconv/from_chars_integral.h @@ -11,6 +11,7 @@ #define _LIBCPP___CHARCONV_FROM_CHARS_INTEGRAL_H #include <__algorithm/copy_n.h> +#include <__assert> #include <__charconv/from_chars_result.h> #include <__charconv/traits.h> #include <__config> diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h index 0dee351521f9c6..c49f4f6797aa43 100644 --- a/libcxx/include/__charconv/to_chars_base_10.h +++ b/libcxx/include/__charconv/to_chars_base_10.h @@ -11,6 +11,7 @@ #define _LIBCPP___CHARCONV_TO_CHARS_BASE_10_H #include <__algorithm/copy_n.h> +#include <__assert> #include <__charconv/tables.h> #include <__config> #include diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h index 40fbe334d8d54c..0369f4dfb9bda6 100644 --- a/libcxx/include/__charconv/to_chars_integral.h +++ b/libcxx/include/__charconv/to_chars_integral.h @@ -11,6 +11,7 @@ #define _LIBCPP___CHARCONV_TO_CHARS_INTEGRAL_H #include <__algorithm/copy_n.h> +#include <__assert> #include <__bit/countl.h> #include <__charconv/tables.h> #include <__charconv/to_chars_base_10.h> diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h index b4907c3f775715..c91c6da3247978 100644 --- a/libcxx/include/__charconv/traits.h +++ b/libcxx/include/__charconv/traits.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___CHARCONV_TRAITS #define _LIBCPP___CHARCONV_TRAITS +#include <__assert> #include <__bit/countl.h> #include <__charconv/tables.h> #include <__charconv/to_chars_base_10.h> diff --git a/libcxx/include/__config b/libcxx/include/__config index 53ff113a16b2a8..8d4d17378b2973 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -345,87 +345,6 @@ _LIBCPP_HARDENING_MODE_EXTENSIVE, \ _LIBCPP_HARDENING_MODE_DEBUG # endif -// clang-format off -// Fast hardening mode checks. - -# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST - -// Enabled checks. -# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) -// Disabled checks. -// On most modern platforms, dereferencing a null pointer does not lead to an actual memory access. -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) -// Overlapping ranges will make algorithms produce incorrect results but don't directly lead to a security -// vulnerability. -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) - -// Extensive hardening mode checks. - -# elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE - -// Enabled checks. -# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) -// Disabled checks. -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) - -// Debug hardening mode checks. - -# elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG - -// All checks enabled. -# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSERT(expression, message) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) - -// Disable all checks if hardening is not enabled. - -# else - -// All checks disabled. -# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) - -# endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST -// clang-format on - // } HARDENING # define _LIBCPP_TOSTRING2(x) #x diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 6802a8b7bd4ca3..f01d323efff5fc 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -16,6 +16,7 @@ #include <__algorithm/min.h> #include <__algorithm/rotate.h> #include <__algorithm/transform.h> +#include <__assert> #include <__charconv/chars_format.h> #include <__charconv/to_chars_floating_point.h> #include <__charconv/to_chars_result.h> diff --git a/libcxx/include/__numeric/saturation_arithmetic.h b/libcxx/include/__numeric/saturation_arithmetic.h index 0e6f455cf22825..41596a0c58e27d 100644 --- a/libcxx/include/__numeric/saturation_arithmetic.h +++ b/libcxx/include/__numeric/saturation_arithmetic.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___NUMERIC_SATURATION_ARITHMETIC_H #define _LIBCPP___NUMERIC_SATURATION_ARITHMETIC_H +#include <__assert> #include <__concepts/arithmetic.h> #include <__config> #include <__utility/cmp.h> diff --git a/libcxx/include/__random/negative_binomial_distribution.h b/libcxx/include/__random/negative_binomial_distribution.h index eed4f511e87190..6d0055d01ed432 100644 --- a/libcxx/include/__random/negative_binomial_distribution.h +++ b/libcxx/include/__random/negative_binomial_distribution.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H #define _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H +#include <__assert> #include <__config> #include <__random/bernoulli_distribution.h> #include <__random/gamma_distribution.h> diff --git a/libcxx/include/__ranges/repeat_view.h b/libcxx/include/__ranges/repeat_view.h index d08f0e0d4e9f74..620a2645497285 100644 --- a/libcxx/include/__ranges/repeat_view.h +++ b/libcxx/include/__ranges/repeat_view.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___RANGES_REPEAT_VIEW_H #define _LIBCPP___RANGES_REPEAT_VIEW_H +#include <__assert> #include <__concepts/constructible.h> #include <__concepts/same_as.h> #include <__concepts/semiregular.h> diff --git a/libcxx/include/__stop_token/stop_state.h b/libcxx/include/__stop_token/stop_state.h index 462aa73952b84f..df07573f878628 100644 --- a/libcxx/include/__stop_token/stop_state.h +++ b/libcxx/include/__stop_token/stop_state.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___STOP_TOKEN_STOP_STATE_H #define _LIBCPP___STOP_TOKEN_STOP_STATE_H +#include <__assert> #include <__availability> #include <__config> #include <__stop_token/atomic_unique_lock.h> diff --git a/libcxx/include/__string/char_traits.h b/libcxx/include/__string/char_traits.h index 8ea9625d071834..5880d3a22db2e7 100644 --- a/libcxx/include/__string/char_traits.h +++ b/libcxx/include/__string/char_traits.h @@ -14,6 +14,7 @@ #include <__algorithm/find_end.h> #include <__algorithm/find_first_of.h> #include <__algorithm/min.h> +#include <__assert> #include <__compare/ordering.h> #include <__config> #include <__functional/hash.h> diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 70e30bc87e8128..0f62de7fa83f98 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1793,7 +1793,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/any b/libcxx/include/any index 378dfb6e21b536..ce54803cd91b5b 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -80,7 +80,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__memory/allocator.h> diff --git a/libcxx/include/array b/libcxx/include/array index 41f016a4859a32..961b620efb9357 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -116,7 +116,7 @@ template const T&& get(const array&&) noexce #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/swap_ranges.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__fwd/array.h> #include <__iterator/reverse_iterator.h> diff --git a/libcxx/include/atomic b/libcxx/include/atomic index 2dac69377b77f0..61ff61d415dd84 100644 --- a/libcxx/include/atomic +++ b/libcxx/include/atomic @@ -587,7 +587,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__atomic/aliases.h> #include <__atomic/atomic.h> #include <__atomic/atomic_base.h> diff --git a/libcxx/include/barrier b/libcxx/include/barrier index f91452c8d0064c..c5fd84b91925b1 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -51,7 +51,7 @@ namespace std # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__atomic/atomic_base.h> #include <__atomic/memory_order.h> #include <__availability> diff --git a/libcxx/include/bit b/libcxx/include/bit index 84e2080377e4fa..b8e4bdc2dfe202 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -61,7 +61,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__bit/bit_cast.h> #include <__bit/bit_ceil.h> #include <__bit/bit_floor.h> diff --git a/libcxx/include/bitset b/libcxx/include/bitset index 95f7a63b23179c..8818ab6563b570 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -129,7 +129,6 @@ template struct hash>; #include <__algorithm/count.h> #include <__algorithm/fill.h> #include <__algorithm/find.h> -#include <__assert> // all public C++ headers provide the assertion handler #include <__bit_reference> #include <__config> #include <__functional/hash.h> diff --git a/libcxx/include/cassert b/libcxx/include/cassert index 761f57dee1db57..6fec37dc637610 100644 --- a/libcxx/include/cassert +++ b/libcxx/include/cassert @@ -16,7 +16,6 @@ Macros: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // is not provided by libc++ diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index cf05c7a9108141..94d2c8d7d003d4 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -17,7 +17,6 @@ */ -#include <__assert> // all public C++ headers provide the assertion handler #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/cctype b/libcxx/include/cctype index 32be6f38e5f898..d7af7e084aa23a 100644 --- a/libcxx/include/cctype +++ b/libcxx/include/cctype @@ -34,7 +34,6 @@ int toupper(int c); } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cerrno b/libcxx/include/cerrno index 937ec23c6971ad..d488fa72a54b7a 100644 --- a/libcxx/include/cerrno +++ b/libcxx/include/cerrno @@ -22,7 +22,6 @@ Macros: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cfenv b/libcxx/include/cfenv index 16b3761ee27b16..f8cacd562f76bd 100644 --- a/libcxx/include/cfenv +++ b/libcxx/include/cfenv @@ -52,7 +52,6 @@ int feupdateenv(const fenv_t* envp); } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cfloat b/libcxx/include/cfloat index 4f991dd49ff4f8..5d1b38c557dcad 100644 --- a/libcxx/include/cfloat +++ b/libcxx/include/cfloat @@ -69,7 +69,6 @@ Macros: LDBL_TRUE_MIN // C11 */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/charconv b/libcxx/include/charconv index 5a2869acba8715..5bc7b9011be024 100644 --- a/libcxx/include/charconv +++ b/libcxx/include/charconv @@ -69,7 +69,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__charconv/chars_format.h> #include <__charconv/from_chars_integral.h> #include <__charconv/from_chars_result.h> diff --git a/libcxx/include/chrono b/libcxx/include/chrono index fe73f7c772b996..b3b260c2a998e6 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -825,7 +825,6 @@ constexpr chrono::year operator ""y(unsigned lo // clang-format on -#include <__assert> // all public C++ headers provide the assertion handler #include <__chrono/calendar.h> #include <__chrono/convert_to_timespec.h> #include <__chrono/convert_to_tm.h> diff --git a/libcxx/include/cinttypes b/libcxx/include/cinttypes index a5b9558abde12d..52663a4f35fad5 100644 --- a/libcxx/include/cinttypes +++ b/libcxx/include/cinttypes @@ -234,7 +234,6 @@ uintmax_t wcstoumax(const wchar_t* restrict nptr, wchar_t** restrict endptr, int } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // standard-mandated includes diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index e0cd722495ed0d..1d859f08fac572 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -15,7 +15,6 @@ */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/climits b/libcxx/include/climits index 2e8993e4d6a519..bcd8b4a56a073c 100644 --- a/libcxx/include/climits +++ b/libcxx/include/climits @@ -37,7 +37,6 @@ Macros: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/clocale b/libcxx/include/clocale index e2ace355d7b648..c689a64be288a3 100644 --- a/libcxx/include/clocale +++ b/libcxx/include/clocale @@ -34,7 +34,6 @@ lconv* localeconv(); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cmath b/libcxx/include/cmath index 798ddb4963b0ec..dd194bbb558969 100644 --- a/libcxx/include/cmath +++ b/libcxx/include/cmath @@ -304,7 +304,6 @@ constexpr long double lerp(long double a, long double b, long double t) noexcept */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index 504dd71f300405..b7182ff471559d 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -54,7 +54,6 @@ class codecvt_utf8_utf16 */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__locale> #include diff --git a/libcxx/include/compare b/libcxx/include/compare index cc0cae8a544d62..93953254b78436 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -140,7 +140,6 @@ namespace std { } */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__compare/common_comparison_category.h> #include <__compare/compare_partial_order_fallback.h> #include <__compare/compare_strong_order_fallback.h> diff --git a/libcxx/include/complex b/libcxx/include/complex index 0aba60e514ba22..e996485a38ae67 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -256,7 +256,6 @@ template complex tanh (const complex&); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__fwd/complex.h> #include <__tuple/tuple_element.h> diff --git a/libcxx/include/concepts b/libcxx/include/concepts index 5fdf30ecfbd3fb..e10f5ab5ad8a18 100644 --- a/libcxx/include/concepts +++ b/libcxx/include/concepts @@ -129,7 +129,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__concepts/arithmetic.h> #include <__concepts/assignable.h> #include <__concepts/boolean_testable.h> diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index 6aac3c13ef4a74..4ded1140d46b1b 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -118,7 +118,6 @@ public: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__chrono/duration.h> #include <__chrono/steady_clock.h> diff --git a/libcxx/include/coroutine b/libcxx/include/coroutine index f264570128bb80..4bd1d4e9c3103a 100644 --- a/libcxx/include/coroutine +++ b/libcxx/include/coroutine @@ -38,7 +38,6 @@ struct suspend_always; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__coroutine/coroutine_handle.h> #include <__coroutine/coroutine_traits.h> diff --git a/libcxx/include/csetjmp b/libcxx/include/csetjmp index 9012cad22ebe74..7ba90068710aea 100644 --- a/libcxx/include/csetjmp +++ b/libcxx/include/csetjmp @@ -30,7 +30,6 @@ void longjmp(jmp_buf env, int val); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // is not provided by libc++ diff --git a/libcxx/include/csignal b/libcxx/include/csignal index cf45f507535e1d..804a7f95ae9682 100644 --- a/libcxx/include/csignal +++ b/libcxx/include/csignal @@ -39,7 +39,6 @@ int raise(int sig); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // is not provided by libc++ diff --git a/libcxx/include/cstdarg b/libcxx/include/cstdarg index 3a4291f4584aa1..4642eb7b5258ca 100644 --- a/libcxx/include/cstdarg +++ b/libcxx/include/cstdarg @@ -31,7 +31,6 @@ Types: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // is not provided by libc++ diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index ce608033a22ce1..ef731c021a4ab8 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -19,7 +19,6 @@ Macros: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef index 1d7bac24c81eaa..ed16ae44fb2bf8 100644 --- a/libcxx/include/cstddef +++ b/libcxx/include/cstddef @@ -33,7 +33,6 @@ Types: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> diff --git a/libcxx/include/cstdint b/libcxx/include/cstdint index 829d9398f387a8..8c4782859426dd 100644 --- a/libcxx/include/cstdint +++ b/libcxx/include/cstdint @@ -140,7 +140,6 @@ Types: } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cstdio b/libcxx/include/cstdio index 0a867cec1a388b..7f94371081f8b1 100644 --- a/libcxx/include/cstdio +++ b/libcxx/include/cstdio @@ -95,7 +95,6 @@ void perror(const char* s); } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cstdlib b/libcxx/include/cstdlib index 9bf0ea3f73b169..c817fd8f4accda 100644 --- a/libcxx/include/cstdlib +++ b/libcxx/include/cstdlib @@ -81,7 +81,6 @@ void *aligned_alloc(size_t alignment, size_t size); // C11 */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cstring b/libcxx/include/cstring index a9bdf4ff2dfca7..c2c92b02e73cc1 100644 --- a/libcxx/include/cstring +++ b/libcxx/include/cstring @@ -56,7 +56,6 @@ size_t strlen(const char* s); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/is_constant_evaluated.h> diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index bfcf2f98d470c8..6237979be4906c 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -18,7 +18,6 @@ */ -#include <__assert> // all public C++ headers provide the assertion handler #include #include diff --git a/libcxx/include/ctime b/libcxx/include/ctime index b61e19d6446ddc..f47b49a43e23ef 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -45,7 +45,6 @@ int timespec_get( struct timespec *ts, int base); // C++17 */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> // is not provided by libc++ diff --git a/libcxx/include/cuchar b/libcxx/include/cuchar index 03b8c7d2a88bcf..f0015be275367d 100644 --- a/libcxx/include/cuchar +++ b/libcxx/include/cuchar @@ -36,7 +36,6 @@ size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/cwchar b/libcxx/include/cwchar index 122af242880e04..7442438d8f447f 100644 --- a/libcxx/include/cwchar +++ b/libcxx/include/cwchar @@ -102,7 +102,6 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len, */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/apply_cv.h> #include <__type_traits/is_constant_evaluated.h> diff --git a/libcxx/include/cwctype b/libcxx/include/cwctype index 5a2d2427d8471f..04abfabef57933 100644 --- a/libcxx/include/cwctype +++ b/libcxx/include/cwctype @@ -49,7 +49,6 @@ wctrans_t wctrans(const char* property); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/deque b/libcxx/include/deque index c539a06bdd95c0..85ea9c6f661ed6 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -188,7 +188,7 @@ template #include <__algorithm/remove.h> #include <__algorithm/remove_if.h> #include <__algorithm/unwrap_iter.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__format/enable_insertable.h> diff --git a/libcxx/include/exception b/libcxx/include/exception index 97fee977690d0a..5eff8e3f8a4bfa 100644 --- a/libcxx/include/exception +++ b/libcxx/include/exception @@ -76,7 +76,6 @@ template void rethrow_if_nested(const E& e); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__exception/exception.h> #include <__exception/exception_ptr.h> diff --git a/libcxx/include/execution b/libcxx/include/execution index 56facc87379ef1..822ffa1fd3ebc4 100644 --- a/libcxx/include/execution +++ b/libcxx/include/execution @@ -32,7 +32,6 @@ namespace std { } */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/is_execution_policy.h> #include <__type_traits/is_same.h> diff --git a/libcxx/include/expected b/libcxx/include/expected index 44d0ce6b00c81e..f455ab7d5d61c6 100644 --- a/libcxx/include/expected +++ b/libcxx/include/expected @@ -38,7 +38,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__expected/bad_expected_access.h> #include <__expected/expected.h> diff --git a/libcxx/include/experimental/__simd/scalar.h b/libcxx/include/experimental/__simd/scalar.h index 717fd6cd92d710..aff2cd11cfcfaa 100644 --- a/libcxx/include/experimental/__simd/scalar.h +++ b/libcxx/include/experimental/__simd/scalar.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_SCALAR_H #define _LIBCPP_EXPERIMENTAL___SIMD_SCALAR_H +#include <__assert> #include #include #include diff --git a/libcxx/include/experimental/__simd/vec_ext.h b/libcxx/include/experimental/__simd/vec_ext.h index 7883132ba6c0db..c9423df93cfacc 100644 --- a/libcxx/include/experimental/__simd/vec_ext.h +++ b/libcxx/include/experimental/__simd/vec_ext.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP_EXPERIMENTAL___SIMD_VEC_EXT_H #define _LIBCPP_EXPERIMENTAL___SIMD_VEC_EXT_H +#include <__assert> #include <__bit/bit_ceil.h> #include <__utility/forward.h> #include <__utility/integer_sequence.h> diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index e9c1fb6924eced..de82da2d3d72bd 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -52,7 +52,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__memory/addressof.h> #include <__type_traits/decay.h> #include <__utility/forward.h> diff --git a/libcxx/include/experimental/propagate_const b/libcxx/include/experimental/propagate_const index 06d7ba43daf1ca..8c2ceb9def3357 100644 --- a/libcxx/include/experimental/propagate_const +++ b/libcxx/include/experimental/propagate_const @@ -107,7 +107,6 @@ */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__functional/operations.h> #include <__fwd/hash.h> #include <__type_traits/conditional.h> diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd index adca9faa47bb06..fad6431d13a193 100644 --- a/libcxx/include/experimental/simd +++ b/libcxx/include/experimental/simd @@ -71,8 +71,6 @@ inline namespace parallelism_v2 { */ -#include <__assert> // all public C++ headers provide the assertion handler - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/experimental/type_traits b/libcxx/include/experimental/type_traits index 62f9574ec58f4f..37be434f8edd56 100644 --- a/libcxx/include/experimental/type_traits +++ b/libcxx/include/experimental/type_traits @@ -68,7 +68,6 @@ inline namespace fundamentals_v1 { */ -#include <__assert> // all public C++ headers provide the assertion handler #include #if _LIBCPP_STD_VER >= 14 diff --git a/libcxx/include/experimental/utility b/libcxx/include/experimental/utility index c1bd9364fd51e4..8bd0a055b7783f 100644 --- a/libcxx/include/experimental/utility +++ b/libcxx/include/experimental/utility @@ -30,7 +30,6 @@ inline namespace fundamentals_v1 { */ -#include <__assert> // all public C++ headers provide the assertion handler #include #include diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 7ac268d5dcbdec..7b5b31c4081788 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -201,7 +201,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__hash_table> #include diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 79f0925f6f4c67..1ab259b59979f3 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -192,7 +192,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__hash_table> #include diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index ec68354a9fc933..b344ed468082e8 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -533,7 +533,6 @@ inline constexpr bool std::ranges::enable_view // all public C++ headers provide the assertion handler #include <__config> #include <__filesystem/copy_options.h> #include <__filesystem/directory_entry.h> diff --git a/libcxx/include/format b/libcxx/include/format index 64f6ba1d25284a..b2fe0053b974bb 100644 --- a/libcxx/include/format +++ b/libcxx/include/format @@ -188,7 +188,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__format/buffer.h> #include <__format/concepts.h> diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index ffa390f42a1072..a62b171a46783b 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -199,7 +199,6 @@ template #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/min.h> -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__iterator/distance.h> diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 203cc6dfb4b134..513c8dc2b127a4 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -187,7 +187,7 @@ typedef basic_fstream wfstream; */ #include <__algorithm/max.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__fwd/fstream.h> diff --git a/libcxx/include/functional b/libcxx/include/functional index fd99e11fb18180..a2774a48bda0ee 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -513,7 +513,6 @@ POLICY: For non-variadic implementations, the number of arguments is limited */ #include <__algorithm/search.h> -#include <__assert> // all public C++ headers provide the assertion handler #include <__compare/compare_three_way.h> #include <__config> #include <__functional/binary_function.h> diff --git a/libcxx/include/future b/libcxx/include/future index 13828680f03335..fda1591818a667 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -368,7 +368,7 @@ template struct uses_allocator, Alloc>; # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__chrono/duration.h> #include <__chrono/time_point.h> diff --git a/libcxx/include/initializer_list b/libcxx/include/initializer_list index 4c2a7925a57bbf..680ca1cd20d550 100644 --- a/libcxx/include/initializer_list +++ b/libcxx/include/initializer_list @@ -42,7 +42,6 @@ template const E* end(initializer_list il) noexcept; // constexpr in */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/iomanip b/libcxx/include/iomanip index 867408affd22b6..fb4f15b9a58533 100644 --- a/libcxx/include/iomanip +++ b/libcxx/include/iomanip @@ -42,7 +42,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include #include diff --git a/libcxx/include/ios b/libcxx/include/ios index 8465860d08dc14..4b1306fc2ad830 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -217,7 +217,6 @@ storage-class-specifier const error_category& iostream_category() noexcept; # error "The iostreams library is not supported since libc++ has been configured without support for localization." #endif -#include <__assert> // all public C++ headers provide the assertion handler #include <__fwd/ios.h> #include <__ios/fpos.h> #include <__locale> diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd index e28998d004156d..1579fa12754daf 100644 --- a/libcxx/include/iosfwd +++ b/libcxx/include/iosfwd @@ -106,7 +106,6 @@ using wosyncstream = basic_osyncstream; // C++20 */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__fwd/fstream.h> #include <__fwd/ios.h> diff --git a/libcxx/include/iostream b/libcxx/include/iostream index 568ce8caed6ef1..5df45c6d3f78e7 100644 --- a/libcxx/include/iostream +++ b/libcxx/include/iostream @@ -33,7 +33,6 @@ extern wostream wclog; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/istream b/libcxx/include/istream index 7975a9e599a5b6..3f20c355046cea 100644 --- a/libcxx/include/istream +++ b/libcxx/include/istream @@ -158,7 +158,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__fwd/istream.h> #include <__iterator/istreambuf_iterator.h> diff --git a/libcxx/include/iterator b/libcxx/include/iterator index 2f9280742370a2..5779bf828711b8 100644 --- a/libcxx/include/iterator +++ b/libcxx/include/iterator @@ -674,7 +674,6 @@ template constexpr const E* data(initializer_list il) noexcept; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__iterator/access.h> #include <__iterator/advance.h> diff --git a/libcxx/include/latch b/libcxx/include/latch index dd389d296f5c11..3fe201b63d1385 100644 --- a/libcxx/include/latch +++ b/libcxx/include/latch @@ -46,7 +46,7 @@ namespace std # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__atomic/atomic_base.h> #include <__atomic/atomic_sync.h> #include <__atomic/memory_order.h> diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index 22fbea99b848bb..eeeae39ca101d9 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -425,17 +425,17 @@ { include: [ "<__fwd/bit_reference.h>", "private", "", "public" ] }, { include: [ "<__fwd/bit_reference.h>", "private", "", "public" ] }, { include: [ "<__fwd/complex.h>", "private", "", "public" ] }, - { include: [ "<__fwd/fstream.h>", "private", "", "public" ] }, + { include: [ "<__fwd/fstream.h>", "private", "", "public" ] }, { include: [ "<__fwd/hash.h>", "private", "", "public" ] }, - { include: [ "<__fwd/ios.h>", "private", "", "public" ] }, - { include: [ "<__fwd/istream.h>", "private", "", "public" ] }, + { include: [ "<__fwd/ios.h>", "private", "", "public" ] }, + { include: [ "<__fwd/istream.h>", "private", "", "public" ] }, { include: [ "<__fwd/mdspan.h>", "private", "", "public" ] }, { include: [ "<__fwd/memory_resource.h>", "private", "", "public" ] }, - { include: [ "<__fwd/ostream.h>", "private", "", "public" ] }, + { include: [ "<__fwd/ostream.h>", "private", "", "public" ] }, { include: [ "<__fwd/pair.h>", "private", "", "public" ] }, { include: [ "<__fwd/span.h>", "private", "", "public" ] }, - { include: [ "<__fwd/sstream.h>", "private", "", "public" ] }, - { include: [ "<__fwd/streambuf.h>", "private", "", "public" ] }, + { include: [ "<__fwd/sstream.h>", "private", "", "public" ] }, + { include: [ "<__fwd/streambuf.h>", "private", "", "public" ] }, { include: [ "<__fwd/string.h>", "private", "", "public" ] }, { include: [ "<__fwd/string_view.h>", "private", "", "public" ] }, { include: [ "<__fwd/subrange.h>", "private", "", "public" ] }, diff --git a/libcxx/include/limits b/libcxx/include/limits index c704b4dddaf8e2..f15b5b1ab1d52f 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -102,7 +102,6 @@ template<> class numeric_limits; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_signed.h> diff --git a/libcxx/include/list b/libcxx/include/list index 2705d4c9914d80..8f0689268e2a5a 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -202,7 +202,7 @@ template #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> #include <__algorithm/min.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__format/enable_insertable.h> diff --git a/libcxx/include/locale b/libcxx/include/locale index 9e97eb9f339533..e3c63e3abe130e 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -193,7 +193,7 @@ template class messages_byname; #include <__algorithm/max.h> #include <__algorithm/reverse.h> #include <__algorithm/unwrap_iter.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__iterator/access.h> #include <__iterator/back_insert_iterator.h> diff --git a/libcxx/include/map b/libcxx/include/map index a56584589f5c85..5b6ec9d3a21936 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -574,7 +574,7 @@ erase_if(multimap& c, Predicate pred); // C++20 #include <__algorithm/equal.h> #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__functional/binary_function.h> diff --git a/libcxx/include/memory b/libcxx/include/memory index 0ada7cdfa20690..a8c0264eb9eb78 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -917,7 +917,6 @@ template // clang-format on -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__memory/addressof.h> #include <__memory/align.h> diff --git a/libcxx/include/mutex b/libcxx/include/mutex index e67135fc0ec04e..ea56e3051908a7 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -186,7 +186,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__chrono/steady_clock.h> #include <__chrono/time_point.h> #include <__condition_variable/condition_variable.h> diff --git a/libcxx/include/new b/libcxx/include/new index 86fbcb524b66d8..988f7a84422c84 100644 --- a/libcxx/include/new +++ b/libcxx/include/new @@ -86,13 +86,13 @@ void operator delete[](void* ptr, void*) noexcept; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__exception/exception.h> #include <__type_traits/is_function.h> #include <__type_traits/is_same.h> #include <__type_traits/remove_cv.h> +#include <__verbose_abort> #include #include diff --git a/libcxx/include/numbers b/libcxx/include/numbers index 0d834c6b863f66..f48ba4baf38ffd 100644 --- a/libcxx/include/numbers +++ b/libcxx/include/numbers @@ -58,7 +58,6 @@ namespace std::numbers { } */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__concepts/arithmetic.h> #include <__config> #include diff --git a/libcxx/include/numeric b/libcxx/include/numeric index 0fe7115f1c666e..8b429fa2f7e7d5 100644 --- a/libcxx/include/numeric +++ b/libcxx/include/numeric @@ -156,7 +156,6 @@ constexpr T saturate_cast(U x) noexcept; // freestanding, Sin */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include diff --git a/libcxx/include/optional b/libcxx/include/optional index 73da0a8a5a7c19..9e4f0fff2f4a7a 100644 --- a/libcxx/include/optional +++ b/libcxx/include/optional @@ -177,7 +177,7 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__compare/compare_three_way_result.h> #include <__compare/three_way_comparable.h> diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 2e2607340a5de1..42819ceb252c65 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -171,7 +171,6 @@ void vprint_nonunicode(ostream& os, string_view fmt, format_args args); */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__exception/operations.h> diff --git a/libcxx/include/print b/libcxx/include/print index 543a540ee4f27d..a9f10433a7dc61 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -31,7 +31,7 @@ namespace std { } */ -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__concepts/same_as.h> #include <__config> diff --git a/libcxx/include/queue b/libcxx/include/queue index 2263f71fde9073..521a465713cd22 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -258,7 +258,6 @@ template #include <__algorithm/pop_heap.h> #include <__algorithm/push_heap.h> #include <__algorithm/ranges_copy.h> -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__functional/operations.h> #include <__iterator/back_insert_iterator.h> diff --git a/libcxx/include/random b/libcxx/include/random index 02d71ad6dd25c8..9edd6c4608ec26 100644 --- a/libcxx/include/random +++ b/libcxx/include/random @@ -1677,7 +1677,6 @@ class piecewise_linear_distribution } // std */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__random/bernoulli_distribution.h> #include <__random/binomial_distribution.h> diff --git a/libcxx/include/ranges b/libcxx/include/ranges index 660d533b2a7830..167d2137eaf454 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -375,7 +375,6 @@ namespace std { } */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__ranges/access.h> #include <__ranges/all.h> diff --git a/libcxx/include/ratio b/libcxx/include/ratio index de656f38e01de6..b989c272aaee6a 100644 --- a/libcxx/include/ratio +++ b/libcxx/include/ratio @@ -81,7 +81,6 @@ using quetta = ratio <1'000'000'000'000'000'000'000'000'000'000, 1>; // Since C+ } */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__type_traits/integral_constant.h> #include diff --git a/libcxx/include/regex b/libcxx/include/regex index 48af5b8b57fd64..dc3db93744b489 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -791,7 +791,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__algorithm/find.h> #include <__algorithm/search.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__iterator/back_insert_iterator.h> diff --git a/libcxx/include/scoped_allocator b/libcxx/include/scoped_allocator index fa6c6c5d20d864..c53261025be9d7 100644 --- a/libcxx/include/scoped_allocator +++ b/libcxx/include/scoped_allocator @@ -109,7 +109,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__memory/allocator_traits.h> #include <__memory/uses_allocator_construction.h> diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 448b5fbd8c58cf..2dfdae9aa148c1 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -51,7 +51,7 @@ using binary_semaphore = counting_semaphore<1>; # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__atomic/atomic_base.h> #include <__atomic/atomic_sync.h> #include <__atomic/memory_order.h> diff --git a/libcxx/include/set b/libcxx/include/set index 7f8245f8b605ab..e2e87e4cdcfe3b 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -515,7 +515,7 @@ erase_if(multiset& c, Predicate pred); // C++20 #include <__algorithm/equal.h> #include <__algorithm/lexicographical_compare.h> #include <__algorithm/lexicographical_compare_three_way.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__functional/is_transparent.h> diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 57f385b5435eb2..38b559e8930fc5 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -128,7 +128,6 @@ template # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__chrono/duration.h> #include <__chrono/steady_clock.h> diff --git a/libcxx/include/span b/libcxx/include/span index 32364b4270be9e..9efaac517fc8f6 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -128,7 +128,7 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__fwd/span.h> #include <__iterator/bounded_iter.h> diff --git a/libcxx/include/sstream b/libcxx/include/sstream index 8862e2ef99f8da..60bec52209d75e 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -278,7 +278,6 @@ typedef basic_stringstream wstringstream; // clang-format on -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__fwd/sstream.h> diff --git a/libcxx/include/stack b/libcxx/include/stack index 77f1a4e11b732d..4003792600a004 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -114,7 +114,6 @@ template */ #include <__algorithm/ranges_copy.h> -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__iterator/back_insert_iterator.h> #include <__iterator/iterator_traits.h> diff --git a/libcxx/include/stdexcept b/libcxx/include/stdexcept index 3016c130a91b8f..4e4cd22a6a64d2 100644 --- a/libcxx/include/stdexcept +++ b/libcxx/include/stdexcept @@ -41,7 +41,6 @@ public: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__exception/exception.h> #include <__fwd/string.h> diff --git a/libcxx/include/stop_token b/libcxx/include/stop_token index 66c7a6ab5996c1..fee195f9d63d4b 100644 --- a/libcxx/include/stop_token +++ b/libcxx/include/stop_token @@ -37,7 +37,6 @@ namespace std { # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler #include <__stop_token/stop_callback.h> #include <__stop_token/stop_source.h> #include <__stop_token/stop_token.h> diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf index aad7686a435cbc..aec537866c2031 100644 --- a/libcxx/include/streambuf +++ b/libcxx/include/streambuf @@ -107,7 +107,6 @@ protected: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__fwd/streambuf.h> #include <__type_traits/is_same.h> diff --git a/libcxx/include/string b/libcxx/include/string index 530a2233860434..ca5b3fa6a01472 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -572,7 +572,7 @@ basic_string operator""s( const char32_t *str, size_t len ); #include <__algorithm/min.h> #include <__algorithm/remove.h> #include <__algorithm/remove_if.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__format/enable_insertable.h> #include <__functional/hash.h> diff --git a/libcxx/include/string_view b/libcxx/include/string_view index e414507a7933b6..48bbcd80021670 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -206,7 +206,7 @@ namespace std { // clang-format on #include <__algorithm/min.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__functional/hash.h> #include <__functional/unary_function.h> diff --git a/libcxx/include/strstream b/libcxx/include/strstream index e20c86baa6dfc5..e9f533644f78cf 100644 --- a/libcxx/include/strstream +++ b/libcxx/include/strstream @@ -129,7 +129,6 @@ private: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include #include diff --git a/libcxx/include/system_error b/libcxx/include/system_error index a60c98492aaced..eeab347788a9a5 100644 --- a/libcxx/include/system_error +++ b/libcxx/include/system_error @@ -144,7 +144,6 @@ template <> struct hash; */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__system_error/errc.h> #include <__system_error/error_category.h> diff --git a/libcxx/include/thread b/libcxx/include/thread index 29c7e86785cde4..ed70bde76094ae 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -92,7 +92,6 @@ void sleep_for(const chrono::duration& rel_time); # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__thread/formatter.h> #include <__thread/jthread.h> diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 96cf3be85b76f2..0101d64aea4a72 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -205,7 +205,6 @@ template // clang-format on -#include <__assert> // all public C++ headers provide the assertion handler #include <__compare/common_comparison_category.h> #include <__compare/synth_three_way.h> #include <__config> diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 466aeb6e0ddd71..0037c426560e6f 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -416,7 +416,7 @@ namespace std } */ -#include <__assert> // all public C++ headers provide the assertion handler + #include <__config> #include <__fwd/hash.h> // This is https://llvm.org/PR56938 #include <__type_traits/add_const.h> diff --git a/libcxx/include/typeindex b/libcxx/include/typeindex index e6ea12afd52450..6398aa40d616a7 100644 --- a/libcxx/include/typeindex +++ b/libcxx/include/typeindex @@ -45,7 +45,6 @@ struct hash */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__functional/unary_function.h> #include diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index 1144b5b12913e1..dafc7b89248eca 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -56,7 +56,6 @@ public: */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> #include <__exception/exception.h> diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index d2a3b769821d84..ca3d1a80bd578d 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -584,7 +584,7 @@ template */ #include <__algorithm/is_permutation.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__functional/is_transparent.h> diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 50b616907f0052..64a02de3cf55d4 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -532,7 +532,7 @@ template // clang-format on #include <__algorithm/is_permutation.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__config> #include <__functional/is_transparent.h> diff --git a/libcxx/include/utility b/libcxx/include/utility index 1deef3db204107..90713da621c5da 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -246,7 +246,6 @@ template */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__config> #include <__utility/as_const.h> #include <__utility/as_lvalue.h> diff --git a/libcxx/include/valarray b/libcxx/include/valarray index 88b161eccd332f..3d45925a25bef8 100644 --- a/libcxx/include/valarray +++ b/libcxx/include/valarray @@ -350,7 +350,7 @@ template unspecified2 end(const valarray& v); #include <__algorithm/min.h> #include <__algorithm/min_element.h> #include <__algorithm/unwrap_iter.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__config> #include <__functional/operations.h> #include <__memory/addressof.h> diff --git a/libcxx/include/variant b/libcxx/include/variant index 6063739e52c86b..5ce99250a8b4f4 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -210,7 +210,6 @@ namespace std { */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__compare/common_comparison_category.h> #include <__compare/compare_three_way_result.h> diff --git a/libcxx/include/vector b/libcxx/include/vector index 579fadfb404c19..89cbdf0b3ff747 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -315,7 +315,7 @@ template requires is-vector-bool-reference // Since C++ #include <__algorithm/remove_if.h> #include <__algorithm/rotate.h> #include <__algorithm/unwrap_iter.h> -#include <__assert> // all public C++ headers provide the assertion handler +#include <__assert> #include <__availability> #include <__bit_reference> #include <__concepts/same_as.h> diff --git a/libcxx/include/version b/libcxx/include/version index b18927a2bc38c2..cd180441c5b9e1 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -244,7 +244,6 @@ __cpp_lib_within_lifetime 202306L */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> diff --git a/libcxx/test/libcxx/assertions/customize_verbose_abort.link-time.pass.cpp b/libcxx/test/libcxx/assertions/customize_verbose_abort.link-time.pass.cpp index 585ab73f2cb261..9298a1e365fca4 100644 --- a/libcxx/test/libcxx/assertions/customize_verbose_abort.link-time.pass.cpp +++ b/libcxx/test/libcxx/assertions/customize_verbose_abort.link-time.pass.cpp @@ -12,6 +12,7 @@ // failures when back-deploying. // XFAIL: availability-verbose_abort-missing +#include <__verbose_abort> #include void std::__libcpp_verbose_abort(char const*, ...) { diff --git a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py b/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py deleted file mode 100644 index bd883aa0c14502..00000000000000 --- a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py +++ /dev/null @@ -1,33 +0,0 @@ -#===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===----------------------------------------------------------------------===## - -# Test that all public C++ headers define the verbose termination function, which -# is required for users to be able to include any public header and then override -# the function using a strong definition. - -# RUN: %{python} %s %{libcxx-dir}/utils - -import sys -sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers - -for header in public_headers: - # Skip C compatibility headers. - if header.endswith('.h'): - continue - - BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script - print(f"""\ -//--- {header}.compile.pass.cpp -{lit_header_restrictions.get(header, '')} - -// XFAIL{BLOCKLIT}: availability-verbose_abort-missing - -#include <{header}> -using HandlerType = decltype(std::__libcpp_verbose_abort); -""") diff --git a/libcxx/test/libcxx/assertions/modes/none.pass.cpp b/libcxx/test/libcxx/assertions/modes/none.pass.cpp index 4644c5692e70be..8332848c1a8e03 100644 --- a/libcxx/test/libcxx/assertions/modes/none.pass.cpp +++ b/libcxx/test/libcxx/assertions/modes/none.pass.cpp @@ -11,6 +11,7 @@ // REQUIRES: libcpp-hardening-mode=none +#include <__assert> #include bool executed_condition = false; diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b688a30cdb792d..7b6d35d9a7fc53 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -1514,7 +1514,6 @@ def produce_version_header(): */ -#include <__assert> // all public C++ headers provide the assertion handler #include <__availability> #include <__config> diff --git a/libcxx/utils/generate_iwyu_mapping.py b/libcxx/utils/generate_iwyu_mapping.py index 0a650250e747f6..6eb2c6095bf1e7 100644 --- a/libcxx/utils/generate_iwyu_mapping.py +++ b/libcxx/utils/generate_iwyu_mapping.py @@ -40,6 +40,8 @@ def IWYU_mapping(header: str) -> typing.Optional[typing.List[str]]: return ["utility"] elif header == "__fwd/subrange.h": return ["ranges"] + elif re.match("__fwd/(fstream|ios|istream|ostream|sstream|streambuf)[.]h", header): + return ["iosfwd"] # Handle remaining forward declaration headers elif re.match("__fwd/(.+)[.]h", header): return [re.match("__fwd/(.+)[.]h", header).group(1)] diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 328e883ddbe5a6..fdc4ee0c05d755 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -383,7 +383,7 @@ endfunction() function(lldb_find_python_module module) set(MODULE_FOUND PY_${module}_FOUND) - if (DEFINED ${MODULE_FOUND}) + if (${MODULE_FOUND}) return() endif() @@ -392,10 +392,10 @@ function(lldb_find_python_module module) ERROR_QUIET) if (status) - set(${MODULE_FOUND} OFF CACHE BOOL "Failed to find python module '${module}'") + set(${MODULE_FOUND} OFF PARENT_SCOPE) message(STATUS "Could NOT find Python module '${module}'") else() - set(${MODULE_FOUND} ON CACHE BOOL "Found python module '${module}'") + set(${MODULE_FOUND} ON PARENT_SCOPE) message(STATUS "Found Python module '${module}'") endif() endfunction() diff --git a/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py b/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py index d7560156e0571a..15d9feb543895a 100644 --- a/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py +++ b/lldb/test/API/macosx/nslog/TestDarwinNSLogOutput.py @@ -56,8 +56,9 @@ def run_lldb_to_breakpoint(self, exe, source_file, line, settings_commands=None) # So that the child gets torn down after the test. import pexpect - self.child = pexpect.spawnu( - "%s %s %s" % (lldbtest_config.lldbExec, self.lldbOption, exe) + self.child = pexpect.spawn( + "%s %s %s" % (lldbtest_config.lldbExec, self.lldbOption, exe), + encoding="utf-8", ) child = self.child diff --git a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py index e9b5940ff1adaf..31b960859fa2e5 100644 --- a/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py +++ b/lldb/test/API/terminal/TestSTTYBeforeAndAfter.py @@ -37,7 +37,7 @@ def test_stty_dash_a_before_and_afetr_invoking_lldb_command(self): lldb_prompt = "(lldb) " # So that the child gets torn down after the test. - self.child = pexpect.spawnu("expect") + self.child = pexpect.spawn("expect", encoding="utf-8") child = self.child child.expect(expect_prompt) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index d8cbb24b6c9b81..2a9877c721e3b4 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -12,7 +12,8 @@ endif() if(LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS) message(STATUS "Enforcing strict test requirements for LLDB") set(useful_python_modules - psutil # Lit uses psutil to do per-test timeouts. + psutil # Lit uses psutil to do per-test timeouts. + pexpect # We no longer vendor pexpect. ) foreach(module ${useful_python_modules}) lldb_find_python_module(${module}) @@ -30,6 +31,7 @@ endif() # LLDB tree. However, we delay the deletion of it from the tree in case # users/buildbots don't have the package yet and need some time to install it. if (NOT LLDB_TEST_USE_VENDOR_PACKAGES) + unset(PY_pexpect_FOUND CACHE) lldb_find_python_module(pexpect) if (NOT PY_pexpect_FOUND) message(FATAL_ERROR diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 637c2c71b02411..6afaea3f3fc5c6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -908,6 +908,18 @@ class LegalizeRuleSet { LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize)); } + /// Widen the scalar or vector element type to the next power of two that is + /// at least MinSize. No effect if the scalar size is a power of two. + LegalizeRuleSet &widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, + unsigned MinSize = 0) { + using namespace LegalityPredicates; + return actionIf( + LegalizeAction::WidenScalar, + any(scalarOrEltNarrowerThan(TypeIdx, MinSize), + scalarOrEltSizeNotPow2(typeIdx(TypeIdx))), + LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize)); + } + LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) { using namespace LegalityPredicates; return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)), diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index c803f598512016..808e59be788512 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -17,7 +17,7 @@ /* The number of commits in the linear history from the * start of the universe up to the latest llvm main commit * that has been merged */ -#define LLVM_MAIN_REVISION 491120 +#define LLVM_MAIN_REVISION 491175 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 4ee51cd192ed7c..0e81d3b391a083 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -468,9 +468,7 @@ class BinaryOperator : public Instruction { static BinaryOperator *CreateNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore); static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "", - Instruction *InsertBefore = nullptr); - static BinaryOperator *CreateNeg(Value *Op, const Twine &Name, - BasicBlock *InsertAtEnd); + BasicBlock *InsertAtEnd = nullptr); static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name, BasicBlock::iterator InsertBefore); static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name = "", @@ -1538,10 +1536,19 @@ class CallBase : public Instruction { OperandBundleDef OB, Instruction *InsertPt = nullptr); + /// Create a clone of \p CB with operand bundle \p OB added. + static CallBase *addOperandBundle(CallBase *CB, uint32_t ID, + OperandBundleDef OB, + BasicBlock::iterator InsertPt); + /// Create a clone of \p CB with operand bundle \p ID removed. static CallBase *removeOperandBundle(CallBase *CB, uint32_t ID, Instruction *InsertPt = nullptr); + /// Create a clone of \p CB with operand bundle \p ID removed. + static CallBase *removeOperandBundle(CallBase *CB, uint32_t ID, + BasicBlock::iterator InsertPt); + static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Call || I->getOpcode() == Instruction::Invoke || diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index c192d4b84417c9..b44d1c6d3d2f06 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -24,4 +24,11 @@ def int_dx_dot : Intrinsic<[LLVMVectorElementType<0>], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], [IntrNoMem, IntrWillReturn, Commutative] >; + +def int_dx_frac : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + +def int_dx_lerp : + Intrinsic<[LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], + [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>,LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], + [IntrNoMem, IntrWillReturn] >; } diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 93e9ed46642dfc..404424beb01931 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -658,8 +658,8 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::AEK_SSBS}))}, {"neoverse-n2", ARMV9A, (AArch64::ExtensionBitset( - {AArch64::AEK_BF16, AArch64::AEK_DOTPROD, - AArch64::AEK_FP16, AArch64::AEK_I8MM, AArch64::AEK_MTE, + {AArch64::AEK_BF16, AArch64::AEK_DOTPROD, AArch64::AEK_FP16, + AArch64::AEK_FP16FML, AArch64::AEK_I8MM, AArch64::AEK_MTE, AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2BITPERM}))}, {"neoverse-512tvb", ARMV8_4A, diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.def b/llvm/include/llvm/TargetParser/ARMTargetParser.def index 1797a1b238d349..f0ddaa1459e567 100644 --- a/llvm/include/llvm/TargetParser/ARMTargetParser.def +++ b/llvm/include/llvm/TargetParser/ARMTargetParser.def @@ -346,8 +346,8 @@ ARM_CPU_NAME("cortex-x1c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n2", ARMV9A, FK_NEON_FP_ARMV8, false, - (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_I8MM | ARM::AEK_RAS | - ARM::AEK_SB)) + (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_FP16FML | + ARM::AEK_I8MM | ARM::AEK_RAS | ARM::AEK_SB )) ARM_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8079f853aef855..1d016e684c48f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2495,6 +2495,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: + case TargetOpcode::G_SHUFFLE_VECTOR: // Perform operation at larger width (any extension is fines here, high bits // don't affect the result) and then truncate the result back to the // original type. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98f4ec3871642e..91d86e173365b5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3645,32 +3645,42 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } } } - } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { - // If this is a ZEXTLoad and we are looking at the loaded value. - EVT VT = LD->getMemoryVT(); - unsigned MemBits = VT.getScalarSizeInBits(); - Known.Zero.setBitsFrom(MemBits); - } else if (const MDNode *Ranges = LD->getRanges()) { - EVT VT = LD->getValueType(0); - - // TODO: Handle for extending loads - if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + } else if (Op.getResNo() == 0) { + KnownBits Known0(!LD->getMemoryVT().isScalableVT() + ? LD->getMemoryVT().getFixedSizeInBits() + : BitWidth); + EVT VT = Op.getValueType(); + // Fill in any known bits from range information. There are 3 types being + // used. The results VT (same vector elt size as BitWidth), the loaded + // MemoryVT (which may or may not be vector) and the range VTs original + // type. The range matadata needs the full range (i.e + // MemoryVT().getSizeInBits()), which is truncated to the correct elt size + // if it is know. These are then extended to the original VT sizes below. + if (const MDNode *MD = LD->getRanges()) { + computeKnownBitsFromRangeMetadata(*MD, Known0); if (VT.isVector()) { // Handle truncation to the first demanded element. // TODO: Figure out which demanded elements are covered if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) break; + Known0 = Known0.trunc(BitWidth); + } + } - // Handle the case where a load has a vector type, but scalar memory - // with an attached range. - EVT MemVT = LD->getMemoryVT(); - KnownBits KnownFull(MemVT.getSizeInBits()); + if (LD->getMemoryVT().isVector()) + Known0 = Known0.trunc(LD->getMemoryVT().getScalarSizeInBits()); - computeKnownBitsFromRangeMetadata(*Ranges, KnownFull); - Known = KnownFull.trunc(BitWidth); - } else - computeKnownBitsFromRangeMetadata(*Ranges, Known); - } + // Extend the Known bits from memory to the size of the result. + if (ISD::isZEXTLoad(Op.getNode())) + Known = Known0.zext(BitWidth); + else if (ISD::isSEXTLoad(Op.getNode())) + Known = Known0.sext(BitWidth); + else if (ISD::isEXTLoad(Op.getNode())) + Known = Known0.anyext(BitWidth); + else + Known = Known0; + assert(Known.getBitWidth() == BitWidth); + return Known; } break; } diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index e4954c78be4129..3686677b825875 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -100,8 +100,8 @@ static void findDbgIntrinsics(SmallVectorImpl &Result, Value *V, SmallPtrSet EncounteredDPValues; /// Append IntrinsicT users of MetadataAsValue(MD). - auto AppendUsers = [&Ctx, &EncounteredIntrinsics, &Result, - DPValues](Metadata *MD) { + auto AppendUsers = [&Ctx, &EncounteredIntrinsics, &EncounteredDPValues, + &Result, DPValues](Metadata *MD) { if (auto *MDV = MetadataAsValue::getIfExists(Ctx, MD)) { for (User *U : MDV->users()) if (IntrinsicT *DVI = dyn_cast(U)) @@ -114,7 +114,8 @@ static void findDbgIntrinsics(SmallVectorImpl &Result, Value *V, if (LocalAsMetadata *L = dyn_cast(MD)) { for (DPValue *DPV : L->getAllDPValueUsers()) { if (Type == DPValue::LocationType::Any || DPV->getType() == Type) - DPValues->push_back(DPV); + if (EncounteredDPValues.insert(DPV).second) + DPValues->push_back(DPV); } } }; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index c54f8d7aca4a96..ce221758ef798b 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -46,11 +46,11 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) - : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { + : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { - // append this instruction into the basic block - assert(InsertAtEnd && "Basic block to append to may not be NULL!"); - insertInto(InsertAtEnd, InsertAtEnd->end()); + // If requested, append this instruction into the basic block. + if (InsertAtEnd) + insertInto(InsertAtEnd, InsertAtEnd->end()); } Instruction::~Instruction() { @@ -73,7 +73,6 @@ Instruction::~Instruction() { setMetadata(LLVMContext::MD_DIAssignID, nullptr); } - void Instruction::setParent(BasicBlock *P) { Parent = P; } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 25778570ebf34a..42cdcad78228f6 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -303,6 +303,20 @@ void LandingPadInst::addClause(Constant *Val) { // CallBase Implementation //===----------------------------------------------------------------------===// +CallBase *CallBase::Create(CallBase *CB, ArrayRef Bundles, + BasicBlock::iterator InsertPt) { + switch (CB->getOpcode()) { + case Instruction::Call: + return CallInst::Create(cast(CB), Bundles, InsertPt); + case Instruction::Invoke: + return InvokeInst::Create(cast(CB), Bundles, InsertPt); + case Instruction::CallBr: + return CallBrInst::Create(cast(CB), Bundles, InsertPt); + default: + llvm_unreachable("Unknown CallBase sub-class!"); + } +} + CallBase *CallBase::Create(CallBase *CB, ArrayRef Bundles, Instruction *InsertPt) { switch (CB->getOpcode()) { @@ -557,6 +571,18 @@ CallBase::BundleOpInfo &CallBase::getBundleOpInfoForOperand(unsigned OpIdx) { return *Current; } +CallBase *CallBase::addOperandBundle(CallBase *CB, uint32_t ID, + OperandBundleDef OB, + BasicBlock::iterator InsertPt) { + if (CB->getOperandBundle(ID)) + return CB; + + SmallVector Bundles; + CB->getOperandBundlesAsDefs(Bundles); + Bundles.push_back(OB); + return Create(CB, Bundles, InsertPt); +} + CallBase *CallBase::addOperandBundle(CallBase *CB, uint32_t ID, OperandBundleDef OB, Instruction *InsertPt) { @@ -569,6 +595,23 @@ CallBase *CallBase::addOperandBundle(CallBase *CB, uint32_t ID, return Create(CB, Bundles, InsertPt); } +CallBase *CallBase::removeOperandBundle(CallBase *CB, uint32_t ID, + BasicBlock::iterator InsertPt) { + SmallVector Bundles; + bool CreateNew = false; + + for (unsigned I = 0, E = CB->getNumOperandBundles(); I != E; ++I) { + auto Bundle = CB->getOperandBundleAt(I); + if (Bundle.getTagID() == ID) { + CreateNew = true; + continue; + } + Bundles.emplace_back(Bundle); + } + + return CreateNew ? Create(CB, Bundles, InsertPt) : CB; +} + CallBase *CallBase::removeOperandBundle(CallBase *CB, uint32_t ID, Instruction *InsertPt) { SmallVector Bundles; @@ -716,6 +759,13 @@ void CallInst::init(FunctionType *FTy, Value *Func, const Twine &NameStr) { setName(NameStr); } +CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name, + BasicBlock::iterator InsertBefore) + : CallBase(Ty->getReturnType(), Instruction::Call, + OperandTraits::op_end(this) - 1, 1, InsertBefore) { + init(Ty, Func, Name); +} + CallInst::CallInst(FunctionType *Ty, Value *Func, const Twine &Name, Instruction *InsertBefore) : CallBase(Ty->getReturnType(), Instruction::Call, @@ -880,6 +930,20 @@ InvokeInst::InvokeInst(const InvokeInst &II) SubclassOptionalData = II.SubclassOptionalData; } +InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, + BasicBlock::iterator InsertPt) { + std::vector Args(II->arg_begin(), II->arg_end()); + + auto *NewII = InvokeInst::Create( + II->getFunctionType(), II->getCalledOperand(), II->getNormalDest(), + II->getUnwindDest(), Args, OpB, II->getName(), InsertPt); + NewII->setCallingConv(II->getCallingConv()); + NewII->SubclassOptionalData = II->SubclassOptionalData; + NewII->setAttributes(II->getAttributes()); + NewII->setDebugLoc(II->getDebugLoc()); + return NewII; +} + InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, Instruction *InsertPt) { std::vector Args(II->arg_begin(), II->arg_end()); @@ -953,6 +1017,21 @@ CallBrInst::CallBrInst(const CallBrInst &CBI) NumIndirectDests = CBI.NumIndirectDests; } +CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef OpB, + BasicBlock::iterator InsertPt) { + std::vector Args(CBI->arg_begin(), CBI->arg_end()); + + auto *NewCBI = CallBrInst::Create( + CBI->getFunctionType(), CBI->getCalledOperand(), CBI->getDefaultDest(), + CBI->getIndirectDests(), Args, OpB, CBI->getName(), InsertPt); + NewCBI->setCallingConv(CBI->getCallingConv()); + NewCBI->SubclassOptionalData = CBI->SubclassOptionalData; + NewCBI->setAttributes(CBI->getAttributes()); + NewCBI->setDebugLoc(CBI->getDebugLoc()); + NewCBI->NumIndirectDests = CBI->NumIndirectDests; + return NewCBI; +} + CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef OpB, Instruction *InsertPt) { std::vector Args(CBI->arg_begin(), CBI->arg_end()); @@ -1135,6 +1214,18 @@ CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, // CatchSwitchInst Implementation //===----------------------------------------------------------------------===// +CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumReservedValues, + const Twine &NameStr, + BasicBlock::iterator InsertBefore) + : Instruction(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0, + InsertBefore) { + if (UnwindDest) + ++NumReservedValues; + init(ParentPad, UnwindDest, NumReservedValues + 1); + setName(NameStr); +} + CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReservedValues, const Twine &NameStr, @@ -3222,6 +3313,14 @@ void BinaryOperator::AssertOK() { #endif } +BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2, + const Twine &Name, + BasicBlock::iterator InsertBefore) { + assert(S1->getType() == S2->getType() && + "Cannot create binary operator with two operands of differing type!"); + return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore); +} + BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore) { @@ -3245,14 +3344,6 @@ BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name, InsertBefore); } -BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name, - Instruction *InsertBefore) { - Value *Zero = ConstantInt::get(Op->getType(), 0); - return new BinaryOperator(Instruction::Sub, - Zero, Op, - Op->getType(), Name, InsertBefore); -} - BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd) { Value *Zero = ConstantInt::get(Op->getType(), 0); @@ -3285,6 +3376,13 @@ BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name, return BinaryOperator::CreateNUWSub(Zero, Op, Name, InsertAtEnd); } +BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, + BasicBlock::iterator InsertBefore) { + Constant *C = Constant::getAllOnesValue(Op->getType()); + return new BinaryOperator(Instruction::Xor, Op, C, + Op->getType(), Name, InsertBefore); +} + BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, Instruction *InsertBefore) { Constant *C = Constant::getAllOnesValue(Op->getType()); @@ -3829,6 +3927,17 @@ CastInst *CastInst::CreatePointerBitCastOrAddrSpaceCast( return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } +CastInst *CastInst::CreateBitOrPointerCast(Value *S, Type *Ty, + const Twine &Name, + BasicBlock::iterator InsertBefore) { + if (S->getType()->isPointerTy() && Ty->isIntegerTy()) + return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore); + if (S->getType()->isIntegerTy() && Ty->isPointerTy()) + return Create(Instruction::IntToPtr, S, Ty, Name, InsertBefore); + + return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); +} + CastInst *CastInst::CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore) { @@ -4463,6 +4572,18 @@ CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, setName(Name); } +CmpInst * +CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, + const Twine &Name, BasicBlock::iterator InsertBefore) { + if (Op == Instruction::ICmp) { + return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate), + S1, S2, Name); + } + + return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate), + S1, S2, Name); +} + CmpInst * CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore) { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 6e049534529f03..43bcfdad6286f6 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5078,7 +5078,9 @@ void Verifier::visitInstruction(Instruction &I) { } else if (GlobalValue *GV = dyn_cast(I.getOperand(i))) { Check(GV->getParent() == &M, "Referencing global in another module!", &I, &M, GV, GV->getParent()); - } else if (isa(I.getOperand(i))) { + } else if (Instruction *OpInst = dyn_cast(I.getOperand(i))) { + Check(OpInst->getFunction() == BB->getParent(), + "Referring to an instruction in another function!", &I); verifyDominatesUse(I, i); } else if (isa(I.getOperand(i))) { Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index c8de2c0625aa26..acee228a0d0462 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -850,7 +850,7 @@ createTemporaryFile(const Twine &Model, int &ResultFD, "Model must be a simple filename."); // Use P.begin() so that createUniqueEntity doesn't need to recreate Storage. return createUniqueEntity(P.begin(), ResultFD, ResultPath, true, Type, Flags, - owner_read | owner_write); + all_read | all_write); } static std::error_code diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 169b00e5ebc989..b837066554f3c6 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -1514,7 +1514,7 @@ def ProcessorFeatures { FeatureFPARMv8, FeatureFullFP16, FeatureNEON, FeatureRCPC, FeatureSPE, FeatureSSBS, FeaturePerfMon]; - list NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE, + list NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE, FeatureFP16FML, FeatureMatMulInt8, FeatureMTE, FeatureSVE2, FeatureSVE2BitPerm, FeatureTRBE, FeaturePerfMon]; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c21bc3a4abbc00..b1677df56e1bea 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7518,6 +7518,22 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, (AArch64::GPR32RegClass.contains(MO.getReg()) || AArch64::GPR64RegClass.contains(MO.getReg()))) MI.removeOperand(I); + + // Add an implicit use of 'VG' for ADDXri/SUBXri, which are instructions that + // have nothing to do with VG, were it not that they are used to materialise a + // frame-address. If they contain a frame-index to a scalable vector, this + // will likely require an ADDVL instruction to materialise the address, thus + // reading VG. + const MachineFunction &MF = *MI.getMF(); + if (MF.getInfo()->hasStreamingModeChanges() && + (MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::SUBXri)) { + const MachineOperand &MO = MI.getOperand(1); + if (MO.isFI() && MF.getFrameInfo().getStackID(MO.getIndex()) == + TargetStackID::ScalableVector) + MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false, + /*IsImplicit=*/true)); + } } SDValue AArch64TargetLowering::changeStreamingMode( diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 10ad5b1f8f2580..7f8856db6c6e61 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -2809,6 +2809,7 @@ class AddSubImmShift lsl #0, '01' => lsl #12 let Inst{21-10} = imm{11-0}; let DecoderMethod = "DecodeAddSubImmShift"; + let hasPostISelHook = 1; } class BaseAddSubRegPseudo(); - - // If the function contains changes to streaming mode, then there - // is a danger that rematerialised instructions end up between - // instruction sequences (e.g. call sequences, or prolog/epilogue) - // where the streaming-SVE mode is temporarily changed. - if (AFI.hasStreamingModeChanges()) { - // Avoid rematerializing rematerializable instructions that use/define - // scalable values, such as 'pfalse' or 'ptrue', which result in different - // results when the runtime vector length is different. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (any_of(MI.operands(), [&MRI, &MFI](const MachineOperand &MO) { - if (MO.isFI() && - MFI.getStackID(MO.getIndex()) == TargetStackID::ScalableVector) - return true; - if (!MO.isReg()) - return false; - - if (MO.getReg().isVirtual()) { - const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg()); - return AArch64::ZPRRegClass.hasSubClassEq(RC) || - AArch64::PPRRegClass.hasSubClassEq(RC); - } - return AArch64::ZPRRegClass.contains(MO.getReg()) || - AArch64::PPRRegClass.contains(MO.getReg()); - })) - return false; - - // Avoid rematerializing instructions that return a value that is - // different depending on vector length, even when it is not returned - // in a scalable vector/predicate register. - switch (MI.getOpcode()) { - default: - break; - case AArch64::RDVLI_XI: - case AArch64::ADDVL_XXI: - case AArch64::ADDPL_XXI: - case AArch64::CNTB_XPiI: - case AArch64::CNTH_XPiI: - case AArch64::CNTW_XPiI: - case AArch64::CNTD_XPiI: - return false; - } - } - - return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); -} - MachineBasicBlock::iterator AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 63e0cb80d8586f..6c6689091ead4d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -381,8 +381,6 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { int64_t &ByteSized, int64_t &VGSized); - bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; - // Return true if address of the form BaseReg + Scale * ScaledReg + Offset can // be used for a load/store of NumBytes. BaseReg is always present and // implicit. diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index b919c116445c8b..531f21f9c043a2 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -443,6 +443,9 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { Reserved.set(SubReg); } + // VG cannot be allocated + Reserved.set(AArch64::VG); + if (MF.getSubtarget().hasSME2()) { for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true); SubReg.isValid(); ++SubReg) diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index acf067f2cc5a9d..2907ba74ff8108 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -233,6 +233,8 @@ def MSRpstatePseudo : (ins svcr_op:$pstatefield, timm0_1:$imm, GPR64:$rtpstate, timm0_1:$expected_pstate, variable_ops), []>, Sched<[WriteSys]> { let hasPostISelHook = 1; + let Uses = [VG]; + let Defs = [VG]; } def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 GPR64:$rtpstate), (i64 timm0_1:$expected_pstate)), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 91323e456a5ef8..84b4ecb7d2700a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -956,6 +956,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }, changeTo(1, 0)) .moreElementsToNextPow2(0) + .widenScalarOrEltToNextPow2OrMinSize(0, 8) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64) .moreElementsIf( diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 44d9a8ac7cb677..33cb5f9734b819 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -223,6 +223,8 @@ def MSRpstatesvcrImm1 let Inst{8} = imm; let Inst{7-5} = 0b011; // op2 let hasPostISelHook = 1; + let Uses = [VG]; + let Defs = [VG]; } def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 789ec817d3d8b8..c8ca1832ec18dd 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -365,6 +365,7 @@ class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty, let ElementSize = pprty.ElementSize; let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_ptrue opc, string asm, SDPatternOperator op> { @@ -755,6 +756,7 @@ class sve_int_pfalse opc, string asm> let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_pfalse opc, string asm> { @@ -1090,6 +1092,7 @@ class sve_int_count opc, string asm> let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_count opc, string asm, SDPatternOperator op> { @@ -1982,6 +1985,7 @@ class sve_int_dup_mask_imm let DecoderMethod = "DecodeSVELogicalImmInstruction"; let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_mask_imm { @@ -2862,6 +2866,7 @@ class sve_int_arith_vl let Inst{4-0} = Rd; let hasSideEffects = 0; + let Uses = [VG]; } class sve_int_read_vl_a opc2, string asm, bit streaming_sve = 0b0> @@ -2882,6 +2887,7 @@ class sve_int_read_vl_a opc2, string asm, bit streaming_sve = 0b let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } //===----------------------------------------------------------------------===// @@ -4699,6 +4705,7 @@ class sve_int_dup_imm sz8_64, string asm, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_imm { @@ -4741,6 +4748,7 @@ class sve_int_dup_fpimm sz8_64, Operand fpimmtype, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_dup_fpimm { @@ -5657,6 +5665,7 @@ class sve_int_index_ii sz8_64, string asm, ZPRRegOp zprty, let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } multiclass sve_int_index_ii { @@ -9308,6 +9317,7 @@ class sve2p1_ptrue_pn sz, PNRP8to15RegOp pnrty, SDPatte let hasSideEffects = 0; let isReMaterializable = 1; + let Uses = [VG]; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp index 4f65a95de82ac8..a0c6bf7cc31c0a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp @@ -177,7 +177,16 @@ void DivergenceLoweringHelper::buildMergeLaneMasks( B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg}); } -void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; } +// GlobalISel has to constrain S1 incoming taken as-is with lane mask register +// class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block, +// Incoming.Reg becomes that new lane mask. +void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { + B.setInsertPt(*In.Block, In.Block->getFirstTerminator()); + + auto Copy = B.buildCopy(LLT::scalar(1), In.Reg); + MRI->setRegClass(Copy.getReg(0), ST->getBoolRC()); + In.Reg = Copy.getReg(0); +} } // End anonymous namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aacc3590a5dbf9..b2c65e61b0097c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -34,12 +34,6 @@ using namespace llvm; using namespace MIPatternMatch; -static cl::opt AllowRiskySelect( - "amdgpu-global-isel-risky-select", - cl::desc("Allow GlobalISel to select cases that are likely to not work yet"), - cl::init(false), - cl::ReallyHidden); - #define GET_GLOBALISEL_IMPL #define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" @@ -211,14 +205,12 @@ bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { const Register DefReg = I.getOperand(0).getReg(); const LLT DefTy = MRI->getType(DefReg); - if (DefTy == LLT::scalar(1)) { - if (!AllowRiskySelect) { - LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "Selecting risky boolean phi\n"); - } + // S1 G_PHIs should not be selected in instruction-select, instead: + // - divergent S1 G_PHI should go through lane mask merging algorithm + // and be fully inst-selected in AMDGPUGlobalISelDivergenceLowering + // - uniform S1 G_PHI should be lowered into S32 G_PHI in AMDGPURegBankSelect + if (DefTy == LLT::scalar(1)) + return false; // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 7d4d619788d392..4b74f3b81e5e78 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -2509,7 +2509,7 @@ class MUBUF_Real_Atomic_gfx12_impl op, string ps_name, multiclass MUBUF_Real_Atomic_gfx11_Renamed_impl op, bit is_return, string real_name> { - defvar Rtn = !if(!eq(is_return, 1), "_RTN", ""); + defvar Rtn = !if(is_return, "_RTN", ""); def _BOTHEN#Rtn#_gfx11 : MUBUF_Real_Atomic_gfx11_impl, AtomicNoRet; @@ -2526,7 +2526,7 @@ multiclass MUBUF_Real_Atomic_gfx11_Renamed_impl op, bit is_return, multiclass MUBUF_Real_Atomic_gfx12_Renamed_impl op, bit is_return, string real_name> { - defvar Rtn = !if(!eq(is_return, 1), "_RTN", ""); + defvar Rtn = !if(is_return, "_RTN", ""); def _BOTHEN#Rtn#_gfx12 : MUBUF_Real_Atomic_gfx12_impl, AtomicNoRet; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index e6d27c2e64690d..7d79b9bba243cf 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1262,7 +1262,9 @@ defm DS_PK_ADD_RTN_BF16 : DS_Real_gfx12<0x0ab>; // New aliases added in GFX12 without renaming the instructions. def : MnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">, Requires<[isGFX12Plus]>; +def : MnemonicAlias<"ds_subrev_rtn_u32", "ds_rsub_rtn_u32">, Requires<[isGFX12Plus]>; def : MnemonicAlias<"ds_subrev_u64", "ds_rsub_u64">, Requires<[isGFX12Plus]>; +def : MnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">, Requires<[isGFX12Plus]>; //===----------------------------------------------------------------------===// // GFX11. diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 877781568307dc..b62e1a032631fd 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -1682,6 +1682,7 @@ def : ProcNoItin<"neoverse-n1", [ARMv82a, def : ProcNoItin<"neoverse-n2", [ARMv9a, FeatureBF16, + FeatureFP16FML, FeatureMatMulInt8]>; def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 8a3454c89542ce..67ef7986622092 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -12,139 +12,224 @@ //===----------------------------------------------------------------------===// include "llvm/IR/Intrinsics.td" -include "llvm/IR/Attributes.td" -// Abstract representation of the class a DXIL Operation belongs to. -class DXILOpClass { - string Name = name; +class DXILOpClass; + +// Following is a set of DXIL Operation classes whose names appear to be +// arbitrary, yet need to be a substring of the function name used during +// lowering to DXIL Operation calls. These class name strings are specified +// as the third argument of add_dixil_op in utils/hct/hctdb.py and case converted +// in utils/hct/hctdb_instrhelp.py of DirectXShaderCompiler repo. The function +// name has the format "dx.op..". + +defset list OpClasses = { + def acceptHitAndEndSearch : DXILOpClass; + def allocateNodeOutputRecords : DXILOpClass; + def allocateRayQuery : DXILOpClass; + def annotateHandle : DXILOpClass; + def annotateNodeHandle : DXILOpClass; + def annotateNodeRecordHandle : DXILOpClass; + def atomicBinOp : DXILOpClass; + def atomicCompareExchange : DXILOpClass; + def attributeAtVertex : DXILOpClass; + def barrier : DXILOpClass; + def barrierByMemoryHandle : DXILOpClass; + def barrierByMemoryType : DXILOpClass; + def barrierByNodeRecordHandle : DXILOpClass; + def binary : DXILOpClass; + def binaryWithCarryOrBorrow : DXILOpClass; + def binaryWithTwoOuts : DXILOpClass; + def bitcastF16toI16 : DXILOpClass; + def bitcastF32toI32 : DXILOpClass; + def bitcastF64toI64 : DXILOpClass; + def bitcastI16toF16 : DXILOpClass; + def bitcastI32toF32 : DXILOpClass; + def bitcastI64toF64 : DXILOpClass; + def bufferLoad : DXILOpClass; + def bufferStore : DXILOpClass; + def bufferUpdateCounter : DXILOpClass; + def calculateLOD : DXILOpClass; + def callShader : DXILOpClass; + def cbufferLoad : DXILOpClass; + def cbufferLoadLegacy : DXILOpClass; + def checkAccessFullyMapped : DXILOpClass; + def coverage : DXILOpClass; + def createHandle : DXILOpClass; + def createHandleForLib : DXILOpClass; + def createHandleFromBinding : DXILOpClass; + def createHandleFromHeap : DXILOpClass; + def createNodeInputRecordHandle : DXILOpClass; + def createNodeOutputHandle : DXILOpClass; + def cutStream : DXILOpClass; + def cycleCounterLegacy : DXILOpClass; + def discard : DXILOpClass; + def dispatchMesh : DXILOpClass; + def dispatchRaysDimensions : DXILOpClass; + def dispatchRaysIndex : DXILOpClass; + def domainLocation : DXILOpClass; + def dot2 : DXILOpClass; + def dot2AddHalf : DXILOpClass; + def dot3 : DXILOpClass; + def dot4 : DXILOpClass; + def dot4AddPacked : DXILOpClass; + def emitIndices : DXILOpClass; + def emitStream : DXILOpClass; + def emitThenCutStream : DXILOpClass; + def evalCentroid : DXILOpClass; + def evalSampleIndex : DXILOpClass; + def evalSnapped : DXILOpClass; + def finishedCrossGroupSharing : DXILOpClass; + def flattenedThreadIdInGroup : DXILOpClass; + def geometryIndex : DXILOpClass; + def getDimensions : DXILOpClass; + def getInputRecordCount : DXILOpClass; + def getMeshPayload : DXILOpClass; + def getNodeRecordPtr : DXILOpClass; + def getRemainingRecursionLevels : DXILOpClass; + def groupId : DXILOpClass; + def gsInstanceID : DXILOpClass; + def hitKind : DXILOpClass; + def ignoreHit : DXILOpClass; + def incrementOutputCount : DXILOpClass; + def indexNodeHandle : DXILOpClass; + def innerCoverage : DXILOpClass; + def instanceID : DXILOpClass; + def instanceIndex : DXILOpClass; + def isHelperLane : DXILOpClass; + def isSpecialFloat : DXILOpClass; + def legacyDoubleToFloat : DXILOpClass; + def legacyDoubleToSInt32 : DXILOpClass; + def legacyDoubleToUInt32 : DXILOpClass; + def legacyF16ToF32 : DXILOpClass; + def legacyF32ToF16 : DXILOpClass; + def loadInput : DXILOpClass; + def loadOutputControlPoint : DXILOpClass; + def loadPatchConstant : DXILOpClass; + def makeDouble : DXILOpClass; + def minPrecXRegLoad : DXILOpClass; + def minPrecXRegStore : DXILOpClass; + def nodeOutputIsValid : DXILOpClass; + def objectRayDirection : DXILOpClass; + def objectRayOrigin : DXILOpClass; + def objectToWorld : DXILOpClass; + def outputComplete : DXILOpClass; + def outputControlPointID : DXILOpClass; + def pack4x8 : DXILOpClass; + def primitiveID : DXILOpClass; + def primitiveIndex : DXILOpClass; + def quadOp : DXILOpClass; + def quadReadLaneAt : DXILOpClass; + def quadVote : DXILOpClass; + def quaternary : DXILOpClass; + def rawBufferLoad : DXILOpClass; + def rawBufferStore : DXILOpClass; + def rayFlags : DXILOpClass; + def rayQuery_Abort : DXILOpClass; + def rayQuery_CommitNonOpaqueTriangleHit : DXILOpClass; + def rayQuery_CommitProceduralPrimitiveHit : DXILOpClass; + def rayQuery_Proceed : DXILOpClass; + def rayQuery_StateMatrix : DXILOpClass; + def rayQuery_StateScalar : DXILOpClass; + def rayQuery_StateVector : DXILOpClass; + def rayQuery_TraceRayInline : DXILOpClass; + def rayTCurrent : DXILOpClass; + def rayTMin : DXILOpClass; + def renderTargetGetSampleCount : DXILOpClass; + def renderTargetGetSamplePosition : DXILOpClass; + def reportHit : DXILOpClass; + def sample : DXILOpClass; + def sampleBias : DXILOpClass; + def sampleCmp : DXILOpClass; + def sampleCmpBias : DXILOpClass; + def sampleCmpGrad : DXILOpClass; + def sampleCmpLevel : DXILOpClass; + def sampleCmpLevelZero : DXILOpClass; + def sampleGrad : DXILOpClass; + def sampleIndex : DXILOpClass; + def sampleLevel : DXILOpClass; + def setMeshOutputCounts : DXILOpClass; + def splitDouble : DXILOpClass; + def startInstanceLocation : DXILOpClass; + def startVertexLocation : DXILOpClass; + def storeOutput : DXILOpClass; + def storePatchConstant : DXILOpClass; + def storePrimitiveOutput : DXILOpClass; + def storeVertexOutput : DXILOpClass; + def tempRegLoad : DXILOpClass; + def tempRegStore : DXILOpClass; + def tertiary : DXILOpClass; + def texture2DMSGetSamplePosition : DXILOpClass; + def textureGather : DXILOpClass; + def textureGatherCmp : DXILOpClass; + def textureGatherRaw : DXILOpClass; + def textureLoad : DXILOpClass; + def textureStore : DXILOpClass; + def textureStoreSample : DXILOpClass; + def threadId : DXILOpClass; + def threadIdInGroup : DXILOpClass; + def traceRay : DXILOpClass; + def unary : DXILOpClass; + def unaryBits : DXILOpClass; + def unpack4x8 : DXILOpClass; + def viewID : DXILOpClass; + def waveActiveAllEqual : DXILOpClass; + def waveActiveBallot : DXILOpClass; + def waveActiveBit : DXILOpClass; + def waveActiveOp : DXILOpClass; + def waveAllOp : DXILOpClass; + def waveAllTrue : DXILOpClass; + def waveAnyTrue : DXILOpClass; + def waveGetLaneCount : DXILOpClass; + def waveGetLaneIndex : DXILOpClass; + def waveIsFirstLane : DXILOpClass; + def waveMatch : DXILOpClass; + def waveMatrix_Accumulate : DXILOpClass; + def waveMatrix_Annotate : DXILOpClass; + def waveMatrix_Depth : DXILOpClass; + def waveMatrix_Fill : DXILOpClass; + def waveMatrix_LoadGroupShared : DXILOpClass; + def waveMatrix_LoadRawBuf : DXILOpClass; + def waveMatrix_Multiply : DXILOpClass; + def waveMatrix_ScalarOp : DXILOpClass; + def waveMatrix_StoreGroupShared : DXILOpClass; + def waveMatrix_StoreRawBuf : DXILOpClass; + def waveMultiPrefixBitCount : DXILOpClass; + def waveMultiPrefixOp : DXILOpClass; + def wavePrefixOp : DXILOpClass; + def waveReadLaneAt : DXILOpClass; + def waveReadLaneFirst : DXILOpClass; + def worldRayDirection : DXILOpClass; + def worldRayOrigin : DXILOpClass; + def worldToObject : DXILOpClass; + def writeSamplerFeedback : DXILOpClass; + def writeSamplerFeedbackBias : DXILOpClass; + def writeSamplerFeedbackGrad : DXILOpClass; + def writeSamplerFeedbackLevel: DXILOpClass; } -// Abstract representation of the category a DXIL Operation belongs to -class DXILOpCategory { - string Name = name; +// Abstraction DXIL Operation to LLVM intrinsic +class DXILOpMapping { + int OpCode = opCode; // Opcode corresponding to DXIL Operation + DXILOpClass OpClass = opClass; // Class of DXIL Operation. + Intrinsic LLVMIntrinsic = intrinsic; // LLVM Intrinsic the DXIL Operation maps + string Doc = doc; // to a short description of the operation } -def UnaryClass : DXILOpClass<"Unary">; -def BinaryClass : DXILOpClass<"Binary">; -def FlattenedThreadIdInGroupClass : DXILOpClass<"FlattenedThreadIdInGroup">; -def ThreadIdInGroupClass : DXILOpClass<"ThreadIdInGroup">; -def ThreadIdClass : DXILOpClass<"ThreadId">; -def GroupIdClass : DXILOpClass<"GroupId">; - -def BinaryUintCategory : DXILOpCategory<"Binary uint">; -def UnaryFloatCategory : DXILOpCategory<"Unary float">; -def ComputeIDCategory : DXILOpCategory<"Compute/Mesh/Amplification shader">; - -// Represent as any pointer type with an option to change to a qualified pointer -// type with address space specified. -def dxil_handle_ty : LLVMAnyPointerType; -def dxil_cbuffer_ty : LLVMAnyPointerType; -def dxil_resource_ty : LLVMAnyPointerType; - -// The parameter description for a DXIL operation -class DXILOpParameter { - int Pos = pos; // Position in parameter list - LLVMType ParamType = type; // Parameter type - string Name = name; // Short, unique parameter name - string Doc = doc; // Description of this parameter - bit IsConstant = isConstant; // Whether this parameter requires a constant value in the IR - string EnumName = enumName; // Name of the enum type, if applicable - int MaxValue = maxValue; // Maximum value for this parameter, if applicable -} - -// A representation for a DXIL operation -class DXILOperationDesc { - string OpName = ""; // Name of DXIL operation - int OpCode = 0; // Unique non-negative integer associated with the operation - DXILOpClass OpClass; // Class of the operation - DXILOpCategory OpCategory; // Category of the operation - string Doc = ""; // Description of the operation - list Params = []; // Parameter list of the operation - list OverloadTypes = []; // Overload types, if applicable - EnumAttr Attribute; // Operation Attribute. Leverage attributes defined in Attributes.td - // ReadNone - operation does not access memory. - // ReadOnly - only reads from memory. - // "ReadMemory" - reads memory - bit IsDerivative = 0; // Whether this is some kind of derivative - bit IsGradient = 0; // Whether this requires a gradient calculation - bit IsFeedback = 0; // Whether this is a sampler feedback operation - bit IsWave = 0; // Whether this requires in-wave, cross-lane functionality - bit NeedsUniformInputs = 0; // Whether this operation requires that all - // of its inputs are uniform across the wave - // Group DXIL operation for stats - e.g., to accumulate the number of atomic/float/uint/int/... - // operations used in the program. - list StatsGroup = []; -} - -class DXILOperation oloadTypes, EnumAttr attrs, list params, - list statsGroup = []> : DXILOperationDesc { - let OpName = name; - let OpCode = opCode; - let Doc = doc; - let Params = params; - let OpClass = opClass; - let OpCategory = opCategory; - let OverloadTypes = oloadTypes; - let Attribute = attrs; - let StatsGroup = statsGroup; -} - -// LLVM intrinsic that DXIL operation maps to. -class LLVMIntrinsic { Intrinsic llvm_intrinsic = llvm_intrinsic_; } - -def Sin : DXILOperation<"Sin", 13, UnaryClass, UnaryFloatCategory, "returns sine(theta) for theta in radians.", - [llvm_half_ty, llvm_float_ty], ReadNone, - [ - DXILOpParameter<0, llvm_anyfloat_ty, "", "operation result">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode">, - DXILOpParameter<2, llvm_anyfloat_ty, "value", "input value"> - ], - ["floats"]>, - LLVMIntrinsic; - -def UMax : DXILOperation< "UMax", 39, BinaryClass, BinaryUintCategory, "unsigned integer maximum. UMax(a,b) = a > b ? a : b", - [llvm_i16_ty, llvm_i32_ty, llvm_i64_ty], ReadNone, - [ - DXILOpParameter<0, llvm_anyint_ty, "", "operation result">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode">, - DXILOpParameter<2, llvm_anyint_ty, "a", "input value">, - DXILOpParameter<3, llvm_anyint_ty, "b", "input value"> - ], - ["uints"]>, - LLVMIntrinsic; - -def ThreadId : DXILOperation< "ThreadId", 93, ThreadIdClass, ComputeIDCategory, "reads the thread ID", [llvm_i32_ty], ReadNone, - [ - DXILOpParameter<0, llvm_i32_ty, "", "thread ID component">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode">, - DXILOpParameter<2, llvm_i32_ty, "component", "component to read (x,y,z)"> - ]>, - LLVMIntrinsic; - -def GroupId : DXILOperation< "GroupId", 94, GroupIdClass, ComputeIDCategory, "reads the group ID (SV_GroupID)", [llvm_i32_ty], ReadNone, - [ - DXILOpParameter<0, llvm_i32_ty, "", "group ID component">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode">, - DXILOpParameter<2, llvm_i32_ty, "component", "component to read"> - ]>, - LLVMIntrinsic; - -def ThreadIdInGroup : DXILOperation< "ThreadIdInGroup", 95, ThreadIdInGroupClass, ComputeIDCategory, - "reads the thread ID within the group (SV_GroupThreadID)", [llvm_i32_ty], ReadNone, - [ - DXILOpParameter<0, llvm_i32_ty, "", "thread ID in group component">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode">, - DXILOpParameter<2, llvm_i32_ty, "component", "component to read (x,y,z)"> - ]>, - LLVMIntrinsic; - -def FlattenedThreadIdInGroup : DXILOperation< "FlattenedThreadIdInGroup", 96, FlattenedThreadIdInGroupClass, ComputeIDCategory, - "provides a flattened index for a given thread within a given group (SV_GroupIndex)", [llvm_i32_ty], ReadNone, - [ - DXILOpParameter<0, llvm_i32_ty, "", "result">, - DXILOpParameter<1, llvm_i32_ty, "opcode", "DXIL opcode"> - ]>, - LLVMIntrinsic; +// Concrete definition of DXIL Operation mapping to corresponding LLVM intrinsic +def Sin : DXILOpMapping<13, unary, int_sin, + "Returns sine(theta) for theta in radians.">; +def UMax : DXILOpMapping<39, binary, int_umax, + "Unsigned integer maximum. UMax(a,b) = a > b ? a : b">; +def ThreadId : DXILOpMapping<93, threadId, int_dx_thread_id, + "Reads the thread ID">; +def GroupId : DXILOpMapping<94, groupId, int_dx_group_id, + "Reads the group ID (SV_GroupID)">; +def ThreadIdInGroup : DXILOpMapping<95, threadIdInGroup, + int_dx_thread_id_in_group, + "Reads the thread ID within the group " + "(SV_GroupThreadID)">; +def FlattenedThreadIdInGroup : DXILOpMapping<96, flattenedThreadIdInGroup, + int_dx_flattened_thread_id_in_group, + "Provides a flattened index for a " + "given thread within a given " + "group (SV_GroupIndex)">; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 42180a865b72e3..21a20d45b922d9 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -221,12 +221,26 @@ static Type *getTypeFromParameterKind(ParameterKind Kind, Type *OverloadTy) { return nullptr; } +/// Construct DXIL function type. This is the type of a function with +/// the following prototype +/// OverloadType dx.op..(int opcode, ) +/// are constructed from types in Prop. +/// \param Prop Structure containing DXIL Operation properties based on +/// its specification in DXIL.td. +/// \param OverloadTy Return type to be used to construct DXIL function type. static FunctionType *getDXILOpFunctionType(const OpCodeProperty *Prop, Type *OverloadTy) { SmallVector ArgTys; auto ParamKinds = getOpCodeParameterKind(*Prop); + // Add OverloadTy as return type of the function + ArgTys.emplace_back(OverloadTy); + + // Add DXIL Opcode value type viz., Int32 as first argument + ArgTys.emplace_back(Type::getInt32Ty(OverloadTy->getContext())); + + // Add DXIL Operation parameter types as specified in DXIL properties for (unsigned I = 0; I < Prop->NumOfParameters; ++I) { ParameterKind Kind = ParamKinds[I]; ArgTys.emplace_back(getTypeFromParameterKind(Kind, OverloadTy)); @@ -267,13 +281,13 @@ CallInst *DXILOpBuilder::createDXILOpCall(dxil::OpCode OpCode, Type *OverloadTy, return B.CreateCall(Fn, FullArgs); } -Type *DXILOpBuilder::getOverloadTy(dxil::OpCode OpCode, FunctionType *FT, - bool NoOpCodeParam) { +Type *DXILOpBuilder::getOverloadTy(dxil::OpCode OpCode, FunctionType *FT) { const OpCodeProperty *Prop = getOpCodeProperty(OpCode); + // If DXIL Op has no overload parameter, just return the + // precise return type specified. if (Prop->OverloadParamIndex < 0) { auto &Ctx = FT->getContext(); - // When only has 1 overload type, just return it. switch (Prop->OverloadTys) { case OverloadKind::VOID: return Type::getVoidTy(Ctx); @@ -302,9 +316,8 @@ Type *DXILOpBuilder::getOverloadTy(dxil::OpCode OpCode, FunctionType *FT, // Prop->OverloadParamIndex is 0, overload type is FT->getReturnType(). Type *OverloadType = FT->getReturnType(); if (Prop->OverloadParamIndex != 0) { - // Skip Return Type and Type for DXIL opcode. - const unsigned SkipedParam = NoOpCodeParam ? 2 : 1; - OverloadType = FT->getParamType(Prop->OverloadParamIndex - SkipedParam); + // Skip Return Type. + OverloadType = FT->getParamType(Prop->OverloadParamIndex - 1); } auto ParamKinds = getOpCodeParameterKind(*Prop); diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h index 940ed538c7ce15..1c15f109184adf 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -31,8 +31,7 @@ class DXILOpBuilder { DXILOpBuilder(Module &M, IRBuilderBase &B) : M(M), B(B) {} CallInst *createDXILOpCall(dxil::OpCode OpCode, Type *OverloadTy, llvm::iterator_range Args); - Type *getOverloadTy(dxil::OpCode OpCode, FunctionType *FT, - bool NoOpCodeParam); + Type *getOverloadTy(dxil::OpCode OpCode, FunctionType *FT); static const char *getOpCodeName(dxil::OpCode DXILOp); private: diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f6e2297e9af41f..6b649b76beecdf 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -33,8 +33,7 @@ static void lowerIntrinsic(dxil::OpCode DXILOp, Function &F, Module &M) { IRBuilder<> B(M.getContext()); Value *DXILOpArg = B.getInt32(static_cast(DXILOp)); DXILOpBuilder DXILB(M, B); - Type *OverloadTy = - DXILB.getOverloadTy(DXILOp, F.getFunctionType(), /*NoOpCodeParam*/ true); + Type *OverloadTy = DXILB.getOverloadTy(DXILOp, F.getFunctionType()); for (User *U : make_early_inc_range(F.users())) { CallInst *CI = dyn_cast(U); if (!CI) diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 3b8234c0118435..4c18e076c43936 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1002,7 +1002,7 @@ namespace { bool DeadCodeElimination::isDead(unsigned R) const { for (const MachineOperand &MO : MRI.use_operands(R)) { const MachineInstr *UseI = MO.getParent(); - if (UseI->isDebugValue()) + if (UseI->isDebugInstr()) continue; if (UseI->isPHI()) { assert(!UseI->getOperand(0).getSubReg()); diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index e38c8bacaf2bab..56472d633694ae 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -1180,7 +1180,7 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { bool InsertBeforeBundle; if (MI.isInlineAsm()) InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI); - else if (MI.isDebugValue()) + else if (MI.isDebugInstr()) InsertBeforeBundle = true; else continue; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index b375e8bb4b8fac..cdf7c048a4bf11 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -31,12 +31,13 @@ using namespace llvm; // This part is for ELF object output. RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) - : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) { + : RISCVTargetStreamer(S), CurrentVendor("riscv") { MCAssembler &MCA = getStreamer().getAssembler(); const FeatureBitset &Features = STI.getFeatureBits(); auto &MAB = static_cast(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + setFlagsFromFeatures(STI); // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a // relocation to ensure the jump target is correct after linking. This is due // to a limitation that shouldForceRelocation has to make the decision upfront @@ -91,10 +92,9 @@ void RISCVTargetELFStreamer::finish() { unsigned EFlags = MCA.getELFHeaderEFlags(); - if (STI.hasFeature(RISCV::FeatureStdExtC) || - STI.hasFeature(RISCV::FeatureStdExtZca)) + if (hasRVC()) EFlags |= ELF::EF_RISCV_RVC; - if (STI.hasFeature(RISCV::FeatureStdExtZtso)) + if (hasTSO()) EFlags |= ELF::EF_RISCV_TSO; switch (ABI) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index a6f54bf67b5d2b..e8f29cd8449ba0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -46,7 +46,6 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer { StringRef CurrentVendor; MCSection *AttributeSection = nullptr; - const MCSubtargetInfo &STI; void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 071a3a5aa5d6e7..4a4b1e13c2b9ec 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -48,6 +48,12 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { TargetABI = ABI; } +void RISCVTargetStreamer::setFlagsFromFeatures(const MCSubtargetInfo &STI) { + HasRVC = STI.hasFeature(RISCV::FeatureStdExtC) || + STI.hasFeature(RISCV::FeatureStdExtZca); + HasTSO = STI.hasFeature(RISCV::FeatureStdExtZtso); +} + void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign) { if (EmitStackAlign) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h index 070e72fb157ae9..cb8bc21cb63557 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h @@ -33,6 +33,8 @@ struct RISCVOptionArchArg { class RISCVTargetStreamer : public MCTargetStreamer { RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown; + bool HasRVC = false; + bool HasTSO = false; public: RISCVTargetStreamer(MCStreamer &S); @@ -58,6 +60,9 @@ class RISCVTargetStreamer : public MCTargetStreamer { void emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign); void setTargetABI(RISCVABI::ABI ABI); RISCVABI::ABI getTargetABI() const { return TargetABI; } + void setFlagsFromFeatures(const MCSubtargetInfo &STI); + bool hasRVC() const { return HasRVC; } + bool hasTSO() const { return HasTSO; } }; // This part is for ascii assembly output diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 040cec42674000..0430d603620b6a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -198,6 +198,7 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; + let PostRAScheduler = true; let EnableIntervals = true; let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c8f80ced354538..5cbd9ab4dc2d6c 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2732,13 +2732,15 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, insertDAGNode(*CurDAG, N, Zext); SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt); insertDAGNode(*CurDAG, N, NewShl); + CurDAG->ReplaceAllUsesWith(N, NewShl); + CurDAG->RemoveDeadNode(N.getNode()); // Convert the shift to scale factor. AM.Scale = 1 << ShAmtV; - AM.IndexReg = Zext; - - CurDAG->ReplaceAllUsesWith(N, NewShl); - CurDAG->RemoveDeadNode(N.getNode()); + // If matchIndexRecursively is not called here, + // Zext may be replaced by other nodes but later used to call a builder + // method + AM.IndexReg = matchIndexRecursively(Zext, AM, Depth + 1); return false; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 93088c7cde938b..b807a97d6e4851 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41262,6 +41262,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( KnownZero = LHSZero; break; } + case X86ISD::PCMPEQ: + case X86ISD::PCMPGT: { + APInt LHSUndef, LHSZero; + APInt RHSUndef, RHSZero; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO, + Depth + 1)) + return true; + break; + } case X86ISD::KSHIFTL: { SDValue Src = Op.getOperand(0); auto *Amt = cast(Op.getOperand(1)); @@ -41402,7 +41416,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( break; } case X86ISD::CVTSI2P: - case X86ISD::CVTUI2P: { + case X86ISD::CVTUI2P: + case X86ISD::CVTPH2PS: + case X86ISD::CVTPS2PH: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); APInt SrcUndef, SrcZero; diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 6ce9eb3656c93a..490cb7e528eb6f 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -905,8 +905,8 @@ static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR, for (Operand &Op : Ops) { if (Op.D == Domain::NonNegative) continue; - auto *BO = - BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", + SDI->getIterator()); BO->setDebugLoc(SDI->getDebugLoc()); Op.V = BO; } @@ -919,7 +919,8 @@ static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR, // If the divident was non-positive, we need to negate the result. if (Ops[0].D == Domain::NonPositive) { - Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", + SDI->getIterator()); Res->setDebugLoc(SDI->getDebugLoc()); } @@ -966,8 +967,8 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, for (Operand &Op : Ops) { if (Op.D == Domain::NonNegative) continue; - auto *BO = - BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", + SDI->getIterator()); BO->setDebugLoc(SDI->getDebugLoc()); Op.V = BO; } @@ -981,7 +982,8 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR, // If the operands had two different domains, we need to negate the result. if (Ops[0].D != Ops[1].D) { - Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", + SDI->getIterator()); Res->setDebugLoc(SDI->getDebugLoc()); } diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 818c7b40d489ef..61109ed3765987 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -270,7 +270,8 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, } static Instruction *CreateNeg(Value *S1, const Twine &Name, - Instruction *InsertBefore, Value *FlagsOp) { + BasicBlock::iterator InsertBefore, + Value *FlagsOp) { if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateNeg(S1, Name, InsertBefore); @@ -958,7 +959,8 @@ static Value *NegateValue(Value *V, Instruction *BI, // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); + Instruction *NewNeg = + CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI); ToRedo.insert(NewNeg); return NewNeg; } @@ -1246,7 +1248,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { } if (NeedsNegate) - V = CreateNeg(V, "neg", &*InsertPt, BO); + V = CreateNeg(V, "neg", InsertPt, BO); return V; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4f2e4d11824ed4..5c1e8bebbacf11 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8987,11 +8987,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( BasicBlock *BB = CurrentLinkI->getParent(); VPValue *CondOp = nullptr; - if (CM.blockNeedsPredicationForAnyReason(BB)) { - VPBuilder::InsertPointGuard Guard(Builder); - Builder.setInsertPoint(CurrentLink); + if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - } VPReductionRecipe *RedRecipe = new VPReductionRecipe( RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2b7d518c1c1a78..94b7c4952f055e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5158,7 +5158,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { OrderedEntries.insert(Data.first); } } else { - reorderOrder(Data.first->ReorderIndices, Mask, /*BottomOrder=*/true); + reorderOrder(Data.first->ReorderIndices, Mask); } } } @@ -8102,7 +8102,7 @@ const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E, unsigned Idx) const { Value *Op = E->getOperand(Idx).front(); if (const TreeEntry *TE = getTreeEntry(Op)) { - if (find_if(E->UserTreeIndices, [&](const EdgeInfo &EI) { + if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) { return EI.EdgeIdx == Idx && EI.UserTE == E; }) != TE->UserTreeIndices.end()) return TE; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index ef8e4665364086..42f6570047fc7a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -26,7 +26,7 @@ define void @asm_simple_register_clobber() { define i64 @asm_register_early_clobber() { ; CHECK-LABEL: name: asm_register_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 2752523 /* regdef-ec:GPR64common */, def early-clobber %0, 2752523 /* regdef-ec:GPR64common */, def early-clobber %1, !0 + ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef-ec:GPR64common */, def early-clobber %0, {{[0-9]+}} /* regdef-ec:GPR64common */, def early-clobber %1, !0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]] @@ -54,7 +54,7 @@ entry: define i32 @test_single_register_output() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0 + ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -66,7 +66,7 @@ entry: define i64 @test_single_register_output_s64() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output_s64 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, 2752522 /* regdef:GPR64common */, def %0 + ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 @@ -79,7 +79,7 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 1703946 /* regdef:GPR32common */, def %1 + ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, {{[0-9]+}} /* regdef:GPR32common */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -96,7 +96,7 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 2555914 /* regdef:FPR64 */, def %1 + ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, {{[0-9]+}} /* regdef:FPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) @@ -125,7 +125,7 @@ define zeroext i8 @test_register_output_trunc(ptr %src) nounwind { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1 + ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -155,7 +155,7 @@ define void @test_input_register_imm() { ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[C]](s64) - ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 2752521 /* reguse:GPR64common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:GPR64common */, [[COPY]] ; CHECK-NEXT: RET_ReallyLR call void asm sideeffect "mov x0, $0", "r"(i64 42) ret void @@ -190,7 +190,7 @@ define zeroext i8 @test_input_register(ptr %src) nounwind { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) - ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1, 2752521 /* reguse:GPR64common */, [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %1, {{[0-9]+}} /* reguse:GPR64common */, [[COPY1]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -207,7 +207,7 @@ define i32 @test_memory_constraint(ptr %a) nounwind { ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 1703946 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) + ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $w0 = COPY [[COPY1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -221,7 +221,7 @@ define i16 @test_anyext_input() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1703946 /* regdef:GPR32common */, def %0, 1703945 /* reguse:GPR32common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, {{[0-9]+}} /* reguse:GPR32common */, [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) @@ -237,7 +237,7 @@ define i16 @test_anyext_input_with_matching_constraint() { ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1703946 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll index 59eb80ae6146b3..fbffb50bcbc8a3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll @@ -71,7 +71,7 @@ define void @test2() #0 personality ptr @__gcc_personality_v0 { ; CHECK-NEXT: G_INVOKE_REGION_START ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[DEF]](p0) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2752521 /* reguse:GPR64common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:GPR64common */, [[COPY]] ; CHECK-NEXT: EH_LABEL ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 4879ffd28784c1..63a26dcfea4762 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -287,39 +287,47 @@ body: | ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2 - ; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1 - ; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %cmp(s1) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXT]], 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[SEXT_INREG]](s32) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C2]](s64) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[C3]](s8) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll index 8ed7059d2e754c..58299696e78fc2 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll @@ -5,7 +5,7 @@ entry: ; CHECK: %0:ppr = COPY $p0 ; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr_p8to15 = COPY %0 -; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 458761 /* reguse:PNR_p8to15 */, %1 +; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_p8to15 */, %1 ; CHECK: RET_ReallyLR %predcnt.addr = alloca target("aarch64.svcount"), align 2 store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 @@ -19,7 +19,7 @@ entry: ; CHECK: %0:ppr = COPY $p0 ; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr = COPY %0 -; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 262153 /* reguse:PNR */, %1 +; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR */, %1 ; CHECK: RET_ReallyLR %predcnt.addr = alloca target("aarch64.svcount"), align 2 store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 @@ -33,7 +33,7 @@ entry: ; CHECK: %0:ppr = COPY $p0 ; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2) ; CHECK: %1:pnr_3b = COPY %0 -; CHECK: INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, 393225 /* reguse:PNR_3b */, %1 +; CHECK: INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_3b */, %1 ; CHECK: RET_ReallyLR %predcnt.addr = alloca target("aarch64.svcount"), align 2 store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll index 3b7b5dd3fa7a54..fbe89e70e4d8e2 100644 --- a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll +++ b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll @@ -18,7 +18,7 @@ define i32 @test0() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %5 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -31,7 +31,7 @@ define i32 @test0() { ; CHECK-NEXT: bb.2.direct: ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3 + ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %7 ; CHECK-NEXT: B %bb.4 ; CHECK-NEXT: {{ $}} @@ -107,7 +107,7 @@ define i32 @dont_split1() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %1 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -168,7 +168,7 @@ define i32 @dont_split3() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.x: @@ -194,7 +194,7 @@ define i32 @split_me0() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -244,7 +244,7 @@ define i32 @split_me1(i1 %z) { ; CHECK-NEXT: bb.1.w: ; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY %5 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -297,7 +297,7 @@ define i32 @split_me2(i1 %z) { ; CHECK-NEXT: bb.1.w: ; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %6 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -340,7 +340,7 @@ define i32 @dont_split4() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -379,7 +379,7 @@ define i32 @dont_split5() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -410,7 +410,7 @@ define i32 @split_me3() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -456,7 +456,7 @@ define i32 @dont_split6(i32 %0) { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %2, %bb.2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32common = COPY [[PHI]] - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %4 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} @@ -491,7 +491,7 @@ define i32 @split_me4() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} @@ -522,7 +522,7 @@ define i32 @split_me5() { ; CHECK: bb.0.entry: ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 + ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir index d44d45ea03b6bc..aca2816225e3ee 100644 --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-declare.mir @@ -193,7 +193,7 @@ body: | liveins: $z0, $z1, $p0, $p1, $w0 renamable $p2 = COPY killed $p0 - renamable $p0 = PTRUE_S 31 + renamable $p0 = PTRUE_S 31, implicit $vg ST1W_IMM killed renamable $z0, renamable $p0, %stack.0.z0.addr, 0 :: (store unknown-size into %ir.z0.addr, align 16) ST1W_IMM killed renamable $z1, renamable $p0, %stack.1.z1.addr, 0 :: (store unknown-size into %ir.z1.addr, align 16) STR_PXI killed renamable $p2, %stack.2.p0.addr, 0 :: (store unknown-size into %ir.p0.addr, align 2) diff --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir index 75917ef32ae2ad..0ea180b20730f2 100644 --- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir +++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir @@ -111,7 +111,7 @@ body: | STRXui killed renamable $x1, %stack.1, 0, debug-location !8 DBG_VALUE %stack.1, $noreg, !11, !DIExpression(DW_OP_constu, 16, DW_OP_plus, DW_OP_deref), debug-location !8 - renamable $p2 = PTRUE_S 31, debug-location !DILocation(line: 4, column: 1, scope: !5) + renamable $p2 = PTRUE_S 31, implicit $vg, debug-location !DILocation(line: 4, column: 1, scope: !5) ST1W_IMM renamable $z0, renamable $p2, %stack.2, 0, debug-location !DILocation(line: 5, column: 1, scope: !5) DBG_VALUE %stack.2, $noreg, !12, !DIExpression(DW_OP_deref), debug-location !DILocation(line: 5, column: 1, scope: !5) ST1W_IMM renamable $z1, killed renamable $p2, %stack.3, 0, debug-location !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index 483dbd2f14d556..92fb053b0db726 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir index 8903ca2b865b98..612453ab53f438 100644 --- a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir +++ b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir @@ -145,7 +145,7 @@ body: | liveins: $z1 ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34 - renamable $p0 = PTRUE_S 31, debug-location !34 + renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34 $x0 = ADDXri %stack.0, 0, 0, debug-location !34 ST1W_IMM renamable $z1, killed renamable $p0, %stack.0, 0, debug-location !34 :: (store unknown-size into %stack.0, align 16) $z0 = COPY renamable $z1, debug-location !34 @@ -157,7 +157,7 @@ body: | $z7 = COPY renamable $z1, debug-location !34 BL @bar, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7, implicit $x0, implicit-def $sp, implicit-def $z0, implicit-def $z1, debug-location !34 ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp, debug-location !34 - renamable $p0 = PTRUE_S 31, debug-location !34 + renamable $p0 = PTRUE_S 31, implicit $vg, debug-location !34 $z3 = IMPLICIT_DEF renamable $z1 = LD1W_IMM renamable $p0, %stack.0, 0, debug-location !34 :: (load unknown-size from %stack.0, align 16) ST1W_IMM renamable $z3, killed renamable $p0, %stack.0, 0 :: (store unknown-size into %stack.0, align 16) diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir index 041b2dc6af1277..65148344096cd7 100644 --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -487,7 +487,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]] - ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2359306 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %1, 262158 /* mem:m */, killed [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %1, 262158 /* mem:m */, killed [[COPY1]] ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll index 46b714d8e5fbbe..bb9546af8bb7b6 100644 --- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll +++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll @@ -21,9 +21,7 @@ define noundef i1 @logger(i32 noundef %logLevel, ptr %ea, ptr %pll) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %land.rhs ; CHECK-NEXT: ldr x8, [x1] -; CHECK-NEXT: ldrb w8, [x8] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ldrb w0, [x8] ; CHECK-NEXT: ret entry: %0 = load i32, ptr %pll, align 4 diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll new file mode 100644 index 00000000000000..5638347ee63340 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -0,0 +1,645 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8_zeroes + +; ===== Legal Vector Types ===== + +define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-SD-LABEL: shufflevector_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: adrp x8, .LCPI0_0 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: adrp x8, .LCPI0_0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret + %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %c +} + +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-SD-LABEL: shufflevector_v16i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI1_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v16i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI1_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: shufflevector_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %c +} + +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-SD-LABEL: shufflevector_v8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI3_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI3_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shufflevector_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %c +} + +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +; ===== Legal Vector Types with Zero Masks ===== + +define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: shufflevector_v8i8_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.8b, v0.b[0] +; CHECK-NEXT: ret + %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %c +} + +define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.16b, v0.b[0] +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: shufflevector_v4i16_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret + %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %c +} + +define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shufflevector_v2i32_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %c +} + +define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== + +define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ +; CHECK-LABEL: shufflevector_v2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> + ret <2 x i1> %c +} + +define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ +; CHECK-SD-LABEL: shufflevector_v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6 +; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h +; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h3, v1.h[1] +; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: mov h4, v0.h[2] +; CHECK-GI-NEXT: mov h5, v0.h[3] +; CHECK-GI-NEXT: mov h6, v1.h[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov h2, v1.h[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: mov v0.b[4], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[4], v0.b[0] +; CHECK-GI-NEXT: mov v0.b[5], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[5], v0.b[0] +; CHECK-GI-NEXT: mov v0.b[6], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[6], v0.b[0] +; CHECK-GI-NEXT: mov v0.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + %d = bitcast <4 x i8> %c to i32 + ret i32 %d +} + +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ +; CHECK-SD-LABEL: shufflevector_v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: adrp x8, .LCPI16_0 +; CHECK-SD-NEXT: adrp x9, .LCPI16_1 +; CHECK-SD-NEXT: mov v1.16b, v0.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1] +; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: adrp x8, .LCPI16_1 +; CHECK-GI-NEXT: adrp x9, .LCPI16_0 +; CHECK-GI-NEXT: mov v4.16b, v2.16b +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ +; CHECK-SD-LABEL: shufflevector_v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [sp, #12] +; CHECK-SD-NEXT: strh w8, [sp, #14] +; CHECK-SD-NEXT: ldr w0, [sp, #12] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov s3, v1.s[1] +; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret + %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> + %d = bitcast <2 x i16> %c to i32 + ret i32 %d +} + +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ +; CHECK-SD-LABEL: shufflevector_v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: adrp x8, .LCPI18_0 +; CHECK-SD-NEXT: adrp x9, .LCPI18_1 +; CHECK-SD-NEXT: mov v1.16b, v0.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] +; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: adrp x8, .LCPI18_1 +; CHECK-GI-NEXT: adrp x9, .LCPI18_0 +; CHECK-GI-NEXT: mov v4.16b, v2.16b +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] +; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: shufflevector_v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret + %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> + ret <1 x i32> %c +} + +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-SD-LABEL: shufflevector_v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: mov v2.s[3], v3.s[3] +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI20_0 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-SD-LABEL: shufflevector_v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d +; CHECK-GI-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +; ===== Smaller/Larger Width Vectors with Zero Masks ===== + +define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ +; CHECK-LABEL: shufflevector_v2i1_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> + ret <2 x i1> %c +} + +define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){ +; CHECK-SD-LABEL: shufflevector_v4i8_zeroes: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: dup v0.4h, v0.h[0] +; CHECK-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v4i8_zeroes: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: dup v0.8b, w8 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret + %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> + %d = bitcast <4 x i8> %c to i32 + ret i32 %d +} + +define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){ +; CHECK-LABEL: shufflevector_v32i8_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.16b, v0.b[0] +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){ +; CHECK-SD-LABEL: shufflevector_v2i16_zeroes: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: dup v1.2s, v0.s[0] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [sp, #12] +; CHECK-SD-NEXT: mov w8, v1.s[1] +; CHECK-SD-NEXT: strh w8, [sp, #14] +; CHECK-SD-NEXT: ldr w0, [sp, #12] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i16_zeroes: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: dup v0.4h, w8 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret + %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> + %d = bitcast <2 x i16> %c to i32 + ret i32 %d +} + +define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){ +; CHECK-LABEL: shufflevector_v16i16_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) { +; CHECK-LABEL: shufflevector_v1i32_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> + ret <1 x i32> %c +} + +define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +; ===== Vectors with Non-Pow 2 Widths ===== + +define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: shufflevector_v3i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: mov w1, w2 +; CHECK-NEXT: mov w2, w4 +; CHECK-NEXT: ret + %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> + ret <3 x i8> %c +} + +define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { +; CHECK-SD-LABEL: shufflevector_v7i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: adrp x8, .LCPI31_0 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI31_0] +; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v7i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: adrp x8, .LCPI31_0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI31_0] +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret + %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> + ret <7 x i8> %c +} + +define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { +; CHECK-SD-LABEL: shufflevector_v3i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v3i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI32_0] +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret + %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> + ret <3 x i16> %c +} + +define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { +; CHECK-SD-LABEL: shufflevector_v7i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI33_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v7i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> + ret <7 x i16> %c +} + +define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { +; CHECK-SD-LABEL: shufflevector_v3i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v3i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret + %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %c +} + +; ===== Vectors with Non-Pow 2 Widths with Zero Masks ===== + +define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: shufflevector_v3i8_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w1, w0 +; CHECK-NEXT: mov w2, w0 +; CHECK-NEXT: ret + %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> + ret <3 x i8> %c +} + +define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) { +; CHECK-LABEL: shufflevector_v7i8_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.8b, v0.b[0] +; CHECK-NEXT: ret + %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> + ret <7 x i8> %c +} + +define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) { +; CHECK-LABEL: shufflevector_v3i16_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret + %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> + ret <3 x i16> %c +} + +define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) { +; CHECK-LABEL: shufflevector_v7i16_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> + ret <7 x i16> %c +} + +define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) { +; CHECK-LABEL: shufflevector_v3i32_zeroes: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> + ret <3 x i32> %c +} diff --git a/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir index 3fbb7889c8b79b..6063c8dfc792c9 100644 --- a/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir +++ b/llvm/test/CodeGen/AArch64/sve-localstackalloc.mir @@ -48,7 +48,7 @@ body: | %2:gpr32 = COPY $w0 %1:zpr = COPY $z1 %0:zpr = COPY $z0 - %5:ppr_3b = PTRUE_B 31 + %5:ppr_3b = PTRUE_B 31, implicit $vg %6:gpr64sp = ADDXri %stack.0, 0, 0 ST1B_IMM %1, %5, %6, 1 :: (store unknown-size, align 16) ST1B_IMM %0, %5, %stack.0, 0 :: (store unknown-size into %stack.0, align 16) diff --git a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir index b76fe7821b6c69..8395a7619fbb46 100644 --- a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir +++ b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir @@ -11,15 +11,15 @@ body: | ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ppr = COPY $p0 - ; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE + ; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE implicit $vg ; CHECK-NEXT: [[UZP1_PPP_B:%[0-9]+]]:ppr = UZP1_PPP_B [[COPY]], [[PFALSE]] ; CHECK-NEXT: [[UZP1_PPP_B1:%[0-9]+]]:ppr = UZP1_PPP_B killed [[UZP1_PPP_B]], [[PFALSE]] ; CHECK-NEXT: $p0 = COPY [[UZP1_PPP_B1]] ; CHECK-NEXT: RET_ReallyLR implicit $p0 %0:ppr = COPY $p0 - %2:ppr = PFALSE + %2:ppr = PFALSE implicit $vg %3:ppr = UZP1_PPP_B %0, %2 - %4:ppr = PFALSE + %4:ppr = PFALSE implicit $vg %5:ppr = UZP1_PPP_B killed %3, %4 $p0 = COPY %5 RET_ReallyLR implicit $p0 diff --git a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir index df0e50de4d1a7a..ae70f91a4ec641 100644 --- a/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir +++ b/llvm/test/CodeGen/AArch64/sve-pseudos-expand-undef.mir @@ -26,7 +26,7 @@ body: | name: expand_mls_to_msb body: | bb.0: - renamable $p0 = PTRUE_B 31 + renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLS_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... @@ -36,7 +36,7 @@ body: | name: expand_mla_to_mad body: | bb.0: - renamable $p0 = PTRUE_B 31 + renamable $p0 = PTRUE_B 31, implicit $vg renamable $z0 = MLA_ZPZZZ_B_UNDEF killed renamable $p0, killed renamable $z2, killed renamable $z0, killed renamable $z1 RET_ReallyLR implicit $z0 ... diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir index 81318aa5c2a58d..5169113697dc60 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir @@ -174,7 +174,7 @@ body: | %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_B 31 + %3:ppr = PTRUE_B 31, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv @@ -409,14 +409,14 @@ body: | ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_not_all_active ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - ; CHECK-NEXT: %3:ppr = PTRUE_B 0 + ; CHECK-NEXT: %3:ppr = PTRUE_B 0, implicit $vg ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv ; CHECK-NEXT: %4:gpr32 = COPY $wzr ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_B 0 + %3:ppr = PTRUE_B 0, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv @@ -446,14 +446,14 @@ body: | ; CHECK-LABEL: name: cmpeq_imm_nxv16i8_ptest_of_halfs ; CHECK: %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - ; CHECK-NEXT: %3:ppr = PTRUE_H 31 + ; CHECK-NEXT: %3:ppr = PTRUE_H 31, implicit $vg ; CHECK-NEXT: PTEST_PP killed %3, killed %2, implicit-def $nzcv ; CHECK-NEXT: %4:gpr32 = COPY $wzr ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:zpr = COPY $z0 %0:ppr_3b = COPY $p0 %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv - %3:ppr = PTRUE_H 31 + %3:ppr = PTRUE_H 31, implicit $vg PTEST_PP killed %3, killed %2, implicit-def $nzcv %4:gpr32 = COPY $wzr %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir index 8f7467d99154e3..c1d9dfff73447c 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEGE_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEGE_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEGE_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEGE_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEGE_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEGE_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEGE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir index 217d984560e36c..c6df21f85db773 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGT_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg, implicit $vg %3:ppr = WHILEGT_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg, implicit $vg %4:ppr = WHILEGT_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 1 + %2:ppr = PTRUE_H 1, implicit $vg, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEGT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir index 8d6f466c6b735d..7d8aed3c325a01 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHI_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHI_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHI_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHI_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHI_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 29 + %2:ppr = PTRUE_S 29, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEHI_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir index da76a30f843b7a..f4dbfbc3db1cab 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHS_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEHS_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEHS_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHS_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 30 + %2:ppr = PTRUE_D 30, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEHS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir index 32954d593c1ddd..dc2265490cb55e 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELE_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELE_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELE_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELE_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELE_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELE_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELE_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 7 + %2:ppr = PTRUE_B 7, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELE_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir index cca0ab8ef210b9..4d66e3e57da8b3 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELO_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELO_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELO_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELO_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELO_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 6 + %2:ppr = PTRUE_H 6, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELO_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir index 4bae3a1986f451..ea02f8c70ef86c 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELS_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELS_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELS_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELS_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELS_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 5 + %2:ppr = PTRUE_S 5, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILELS_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir index 3c6a9e21b4c6c1..d08781f203e328 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PWW_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -63,7 +63,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -98,7 +98,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELT_PWW_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -133,7 +133,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILELT_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -168,7 +168,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELT_PWW_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -203,7 +203,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILELT_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -238,7 +238,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -273,7 +273,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILELT_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_D 4 + %2:ppr = PTRUE_D 4, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -393,7 +393,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -433,7 +433,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr32 = COPY $w1 %0:gpr32 = COPY $w0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILELT_PWW_D %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir index 27cdf593df776f..d800009b9537f3 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilerw.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -65,7 +65,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILERW_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -100,7 +100,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILERW_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -135,7 +135,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILERW_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -175,7 +175,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -215,7 +215,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -255,7 +255,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -295,7 +295,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILERW_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir index 3b49b1ec2c8045..9f8b7c3197ecf2 100644 --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilewr.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 31 + %2:ppr = PTRUE_B 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -65,7 +65,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %4:ppr = WHILEWR_PXX_H %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -100,7 +100,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %4:ppr = WHILEWR_PXX_S %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -135,7 +135,7 @@ body: | ; CHECK-NOT: PTEST %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %4:ppr = WHILEWR_PXX_D %0, %1, implicit-def dead $nzcv PTEST_PP %2, %4, implicit-def $nzcv %6:gpr32 = COPY $wzr @@ -175,7 +175,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_B 0 + %2:ppr = PTRUE_B 0, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -215,7 +215,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_H 31 + %2:ppr = PTRUE_H 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -255,7 +255,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_S 31 + %2:ppr = PTRUE_S 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr @@ -295,7 +295,7 @@ body: | ; CHECK-NEXT: %5:gpr32 = CSINCWr %4, $wzr, 0, implicit $nzcv %1:gpr64 = COPY $x1 %0:gpr64 = COPY $x0 - %2:ppr = PTRUE_D 31 + %2:ppr = PTRUE_D 31, implicit $vg %3:ppr = WHILEWR_PXX_B %0, %1, implicit-def dead $nzcv PTEST_PP killed %2, killed %3, implicit-def $nzcv %4:gpr32 = COPY $wzr diff --git a/llvm/test/CodeGen/AArch64/sve2p1_copy_pnr.mir b/llvm/test/CodeGen/AArch64/sve2p1_copy_pnr.mir index d6a87a42a79e00..5e5db2ac4e2079 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1_copy_pnr.mir +++ b/llvm/test/CodeGen/AArch64/sve2p1_copy_pnr.mir @@ -13,10 +13,10 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: pnr_to_ppr - ; CHECK: renamable $pn8 = PTRUE_C_D + ; CHECK: renamable $pn8 = PTRUE_C_D implicit $vg ; CHECK-NEXT: $p0 = ORR_PPzPP $p8, $p8, killed $p8 ; CHECK-NEXT: RET_ReallyLR implicit killed $p0 - renamable $pn8 = PTRUE_C_D + renamable $pn8 = PTRUE_C_D implicit $vg $p0 = COPY killed renamable $pn8 RET_ReallyLR implicit killed $p0 @@ -34,10 +34,10 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: ppr_to_pnr - ; CHECK: renamable $p8 = PTRUE_H 31 + ; CHECK: renamable $p8 = PTRUE_H 31, implicit $vg ; CHECK-NEXT: $p0 = ORR_PPzPP $p8, $p8, killed $p8, implicit-def $pn0 ; CHECK-NEXT: RET_ReallyLR implicit killed $pn0 - renamable $p8 = PTRUE_H 31 + renamable $p8 = PTRUE_H 31, implicit $vg $pn0 = COPY killed renamable $p8 RET_ReallyLR implicit killed $pn0 @@ -55,10 +55,10 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: pnr_to_pnr - ; CHECK: renamable $pn8 = PTRUE_C_H + ; CHECK: renamable $pn8 = PTRUE_C_H implicit $vg ; CHECK-NEXT: $p0 = ORR_PPzPP $p8, $p8, killed $p8, implicit-def $pn0 ; CHECK-NEXT: RET_ReallyLR implicit killed $pn0 - renamable $pn8 = PTRUE_C_H + renamable $pn8 = PTRUE_C_H implicit $vg $pn0 = COPY killed renamable $pn8 RET_ReallyLR implicit killed $pn0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll index 0f70c1996d6e02..d4d5cb18bbd30e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; Divergent phis that don't require lowering using lane mask merging @@ -147,32 +147,28 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-LABEL: divergent_i1_phi_used_inside_loop_bigger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v1 -; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x3e8 -; GFX10-NEXT: v_mov_b32_e32 v8, s5 +; GFX10-NEXT: v_mov_b32_e32 v8, s4 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo ; GFX10-NEXT: s_branch .LBB3_2 ; GFX10-NEXT: .LBB3_1: ; %loop_body ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX10-NEXT: v_cvt_f32_u32_e32 v9, v8 -; GFX10-NEXT: s_xor_b32 s4, s4, -1 +; GFX10-NEXT: s_xor_b32 s5, s5, -1 ; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s4 -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s6, s6, s4 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_and_b32 s7, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s6, s6, s7 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: .LBB3_2: ; %loop_start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_and_b32_e32 v9, 1, v9 ; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8 ; GFX10-NEXT: s_mov_b32 s7, 1 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v9 ; GFX10-NEXT: s_cbranch_vccz .LBB3_4 ; GFX10-NEXT: ; %bb.3: ; %else ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 @@ -189,7 +185,7 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-NEXT: flat_store_dword v[4:5], v1 ; GFX10-NEXT: s_branch .LBB3_1 ; GFX10-NEXT: .LBB3_6: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir index 5549c89dc402f8..9b0bd2752b8231 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir @@ -33,6 +33,7 @@ body: | ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -46,7 +47,8 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY6]](s1), %bb.0, %20(s1), %bb.3 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: @@ -54,12 +56,13 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C3]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -126,9 +129,10 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -137,17 +141,17 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -292,19 +296,21 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %39(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.5 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.0, %39(s1), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C3]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI3]](s32), [[C3]] ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -336,26 +342,27 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C8]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[COPY10]], [[C8]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY10]](s1), [[C11]], [[C10]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -475,6 +482,7 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[AND1]](s32) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC1]], [[C5]] + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.2 ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -487,9 +495,10 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = G_PHI %32(s1), %bb.4, [[C5]](s1), %bb.0 - ; GFX10-NEXT: G_BRCOND [[PHI1]](s1), %bb.5 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY3]](s1), %bb.0, %58(s1), %bb.4 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: G_BRCOND [[COPY4]](s1), %bb.5 ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: @@ -517,6 +526,7 @@ body: | ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[PHI5]](s32), [[AMDGPU_BUFFER_LOAD]] ; GFX10-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] ; GFX10-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C10]](s1) ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: @@ -527,7 +537,7 @@ body: | ; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[C11]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI]](s32), %bb.2, [[OR2]](s32), %bb.5 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.2, [[OR2]](s32), %bb.5 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<8 x s32>) ; GFX10-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY1]] ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index e9df20f9688e6d..49c232661c6dc1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; This file contains various tests that have divergent i1s used outside of ; the loop. These are lane masks is sgpr and need to have correct value in @@ -137,28 +137,24 @@ define void @divergent_i1_xor_used_outside_loop(float %val, float %pre.cond.val, ; GFX10-LABEL: divergent_i1_xor_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v1 -; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v1, s5 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX10-NEXT: .LBB2_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 -; GFX10-NEXT: v_cvt_f32_u32_e32 v5, v1 +; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GFX10-NEXT: s_xor_b32 s5, s5, -1 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX10-NEXT: v_cmp_gt_f32_e64 s4, v5, v0 -; GFX10-NEXT: s_xor_b32 s7, vcc_lo, -1 -; GFX10-NEXT: s_or_b32 s5, s4, s5 -; GFX10-NEXT: v_mov_b32_e32 v4, s7 -; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s6, s4, s6 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s6, s6, s7 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -197,7 +193,7 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts, ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, 1 +; GFX10-NEXT: s_mov_b32 s6, -1 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader @@ -332,7 +328,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4 -; GFX10-NEXT: s_mov_b32 s7, 1 +; GFX10-NEXT: s_mov_b32 s7, -1 ; GFX10-NEXT: ; implicit-def: $vgpr5 ; GFX10-NEXT: s_and_saveexec_b32 s8, s4 ; GFX10-NEXT: s_cbranch_execz .LBB4_1 @@ -410,7 +406,7 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa ; GFX10-LABEL: divergent_i1_freeze_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_mov_b32 s3, 1 +; GFX10-NEXT: s_mov_b32 s3, -1 ; GFX10-NEXT: v_mov_b32_e32 v5, s0 ; GFX10-NEXT: ; implicit-def: $sgpr1 ; GFX10-NEXT: ; implicit-def: $sgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index ace9bec6e1c2c8..206c0adb6c0c1f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -175,14 +175,15 @@ body: | ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[C7]], [[C6]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C7]], [[C6]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -255,37 +256,40 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %27(s1), %bb.1 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %24(s1), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY6]], [[C2]] + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -349,7 +353,8 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY5]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -365,26 +370,26 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[C1]](s1), %bb.0, %39(s1), %bb.8 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %40(s1), %bb.8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY6]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %72(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %61(s1), %bb.7 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %48(s1), %bb.7 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.1, %73(s1), %bb.7 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.1, %62(s1), %bb.7 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.1, %49(s1), %bb.7 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) @@ -392,14 +397,14 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -407,16 +412,16 @@ body: | ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]] ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY14]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} @@ -436,15 +441,15 @@ body: | ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4 ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4 ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]](s1) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]] - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI4]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY18]], [[C9]] + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY17]](s1), [[PHI4]](s32) + ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY19]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 @@ -453,11 +458,11 @@ body: | ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.7 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY19]](s1) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY20]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.2 bb.0: @@ -574,7 +579,7 @@ body: | ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %38(s1), %bb.6 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[DEF1]](s1), %bb.0, %39(s1), %bb.6 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) @@ -600,9 +605,10 @@ body: | ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]] + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -610,21 +616,21 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[C2]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[COPY8]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc @@ -636,9 +642,9 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.6 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY11]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY12]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: @@ -751,26 +757,27 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %53(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %42(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[C1]](s1), %bb.0, %32(s1), %bb.3 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %54(s1), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %43(s1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY5]](s1), %bb.0, %33(s1), %bb.3 ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[COPY8]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY8]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: @@ -783,10 +790,10 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: @@ -794,32 +801,32 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[PHI2]](s1), %bb.1, [[DEF2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]] - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY12]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]] ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C6]], [[C5]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY16]](s1), [[C6]], [[C5]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll index 609fff51863a03..1698f84eea5185 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; Simples case, if - then, that requires lane mask merging, ; %phi lane mask will hold %val_A at %A. Lanes that are active in %B @@ -43,13 +43,12 @@ define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_and_b32 s0, 1, s0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 -; GFX10-NEXT: v_cmp_ne_u32_e64 s2, 0, s0 -; GFX10-NEXT: ; implicit-def: $sgpr0 +; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1 ; GFX10-NEXT: ; %bb.1: ; %B ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 2, v2 -; GFX10-NEXT: s_andn2_b32 s0, s2, exec_lo +; GFX10-NEXT: s_andn2_b32 s0, s0, exec_lo ; GFX10-NEXT: ; implicit-def: $vgpr2 ; GFX10-NEXT: s_and_b32 s2, exec_lo, vcc_lo ; GFX10-NEXT: s_or_b32 s0, s0, s2 @@ -211,7 +210,7 @@ define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) % ; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1 ; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo -; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s4, -1 ; GFX10-NEXT: global_load_dword v9, v[9:10], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 @@ -308,7 +307,7 @@ define amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) % ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v4, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v5, v10, vcc_lo -; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: s_mov_b32 s4, -1 ; GFX10-NEXT: global_load_dword v11, v[11:12], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 @@ -318,7 +317,7 @@ define amdgpu_cs void @loop_with_3breaks(ptr addrspace(1) %x, ptr addrspace(1) % ; GFX10-NEXT: ; in Loop: Header=BB4_4 Depth=1 ; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v9 ; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, v7, v10, vcc_lo -; GFX10-NEXT: s_mov_b32 s5, 1 +; GFX10-NEXT: s_mov_b32 s5, -1 ; GFX10-NEXT: global_load_dword v11, v[11:12], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir index df5505e1b28bbc..8197b072c740b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir @@ -18,9 +18,10 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -29,18 +30,18 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY7]](s1), [[C4]], [[C3]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -91,18 +92,20 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C]] + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[COPY4]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.3 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[COPY5]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[COPY4]](s1), %bb.0, %20(s1), %bb.3 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[COPY6]](s1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) ; GFX10-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_ELSE [[SI_IF]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -111,9 +114,9 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C1]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -122,19 +125,19 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY5]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI [[COPY7]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C3]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C3]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -368,13 +371,14 @@ body: | ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -383,9 +387,9 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} @@ -402,21 +406,21 @@ body: | ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[C4]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY12]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -560,13 +564,14 @@ body: | ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C6]] + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[COPY11]](s1) ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -575,9 +580,9 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI1]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} @@ -585,26 +590,27 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32) ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64) ; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD2]](s32), [[C9]] + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP2]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[C4]](s1), %bb.2, %71(s1), %bb.7 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY11]](s1), %bb.2, %72(s1), %bb.7 ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[COPY16]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -621,21 +627,21 @@ body: | ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C11]] ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C12]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP3]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP3]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[C7]](s1), %bb.4, [[S_OR_B32_2]](s1), %bb.6 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.4, [[S_OR_B32_2]](s1), %bb.6 ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[COPY17]](s1) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[COPY19]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -970,6 +976,7 @@ body: | ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: @@ -982,19 +989,19 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %67(s1), %bb.6, %70(s1), %bb.7 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %67(s1), %bb.6, %71(s1), %bb.7 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %49(s1), %bb.6, %48(s1), %bb.7 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32(s1) = PHI %35(s1), %bb.6, %34(s1), %bb.7 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY9]](s1) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]](s1) + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]](s1) + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]](s1) + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY10]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), %17(s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY9]](s1), %17(s32) + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -1011,28 +1018,28 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT]](s32) ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32) ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[DEF5:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %63(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[COPY3]], [[COPY2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[COPY3]], [[COPY2]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32) ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 @@ -1042,14 +1049,14 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT1]](s32), %bb.3 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %56(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc ; GFX10-NEXT: [[DEF6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.2 @@ -1057,27 +1064,27 @@ body: | ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI [[COPY7]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4 ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32(s1) = PHI [[DEF3]](s1), %bb.0, [[PHI7]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32(s1) = PHI [[DEF2]](s1), %bb.0, [[PHI1]](s1), %bb.2, [[DEF5]](s1), %bb.4 ; GFX10-NEXT: [[PHI9:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, [[PHI2]](s1), %bb.2, [[DEF4]](s1), %bb.4 ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4, [[PHI10]](s32), %bb.2, [[C]](s32), %bb.0 ; GFX10-NEXT: [[PHI11:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INTRINSIC_CONVERGENT]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI9]](s1) + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]](s1) + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]](s1) + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[PHI9]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY20]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY21]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_5]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_6:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY19]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_5]](s1) + ; GFX10-NEXT: [[S_ANDN2_B32_6:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY20]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY22]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_6:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_6]](s1), [[S_AND_B32_6]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_6]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY17]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[COPY24:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_6]](s1) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 bb.0: successors: %bb.7(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll index 312c6a3822ce4f..1855ede0483def 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i1_phi(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i1_phi: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll index b21e6a729dbc22..1934958ea8f37c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i32(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index c7d45f062d0d20..4bb9eb807e1568 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -amdgpu-global-isel-risky-select -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN +# RUN: llc -mtriple=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GCN --- name: g_phi_s32_ss_sbranch @@ -321,60 +321,6 @@ body: | ... ---- -name: g_phi_vcc_s1_sbranch -legalized: true -regBankSelected: true -tracksRegLiveness: true -machineFunctionInfo: {} -body: | - ; GCN-LABEL: name: g_phi_vcc_s1_sbranch - ; GCN: bb.0: - ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc - ; GCN-NEXT: $scc = COPY [[COPY3]] - ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.1: - ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[S_MOV_B32_]], implicit $exec - ; GCN-NEXT: S_BRANCH %bb.2 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: bb.2: - ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[V_CMP_EQ_U32_e64_]], %bb.0, [[V_CMP_EQ_U32_e64_1]], %bb.1 - ; GCN-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]] - bb.0: - liveins: $vgpr0, $vgpr1, $sgpr2 - - %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:sgpr(s32) = COPY $sgpr2 - %3:sgpr(s32) = G_CONSTANT i32 0 - %4:vcc(s1) = G_ICMP intpred(eq), %0, %3 - %5:sgpr(s32) = G_ICMP intpred(eq), %2(s32), %3 - G_BRCOND %5, %bb.1 - G_BR %bb.2 - - bb.1: - %6:vcc(s1) = G_ICMP intpred(eq), %1, %3 - G_BR %bb.2 - - bb.2: - %7:vcc(s1) = G_PHI %4, %bb.0, %6, %bb.1 - S_SETPC_B64 undef $sgpr30_sgpr31, implicit %7 - -... - --- name: phi_s32_ss_sbranch legalized: true diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll index 7c1792e2f101f5..cfdefec04600c8 100644 --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1096,10 +1096,10 @@ declare double @llvm.maxnum.f64(double, double) define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) { ; CHECK_LOCAL-LABEL: test_fmadd_strategy: ; CHECK_LOCAL: # %bb.0: # %entry -; CHECK_LOCAL-NEXT: fmv.d fa5, fa0 ; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1 -; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_LOCAL-NEXT: andi a0, a0, 1 +; CHECK_LOCAL-NEXT: fmv.d fa5, fa0 +; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2 ; CHECK_LOCAL-NEXT: # %bb.1: # %entry ; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1 @@ -1110,10 +1110,10 @@ define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a ; ; CHECK_GLOBAL-LABEL: test_fmadd_strategy: ; CHECK_GLOBAL: # %bb.0: # %entry -; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0 ; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1 -; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_GLOBAL-NEXT: andi a0, a0, 1 +; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0 +; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2 ; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2 ; CHECK_GLOBAL-NEXT: # %bb.1: # %entry ; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1 diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll index 87406f22d169d1..c0c11fefafb555 100644 --- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll +++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll @@ -813,24 +813,24 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) { ; RV32SFB-NEXT: not a7, a2 ; RV32SFB-NEXT: srli a0, a0, 1 ; RV32SFB-NEXT: sll t0, a1, a2 -; RV32SFB-NEXT: srl a0, a0, a7 ; RV32SFB-NEXT: addi a2, a2, -32 +; RV32SFB-NEXT: srl a0, a0, a7 ; RV32SFB-NEXT: mv a1, a3 -; RV32SFB-NEXT: bgez a2, .LBB20_2 +; RV32SFB-NEXT: bltz a2, .LBB20_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: or a1, t0, a0 +; RV32SFB-NEXT: li a3, 0 ; RV32SFB-NEXT: .LBB20_2: # %entry -; RV32SFB-NEXT: bltz a2, .LBB20_4 +; RV32SFB-NEXT: bgez a2, .LBB20_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: li a3, 0 +; RV32SFB-NEXT: or a1, t0, a0 ; RV32SFB-NEXT: .LBB20_4: # %entry ; RV32SFB-NEXT: beqz a6, .LBB20_6 ; RV32SFB-NEXT: # %bb.5: # %entry -; RV32SFB-NEXT: mv a1, a5 +; RV32SFB-NEXT: mv a3, a4 ; RV32SFB-NEXT: .LBB20_6: # %entry ; RV32SFB-NEXT: beqz a6, .LBB20_8 ; RV32SFB-NEXT: # %bb.7: # %entry -; RV32SFB-NEXT: mv a3, a4 +; RV32SFB-NEXT: mv a1, a5 ; RV32SFB-NEXT: .LBB20_8: # %entry ; RV32SFB-NEXT: mv a0, a3 ; RV32SFB-NEXT: ret @@ -874,24 +874,24 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) { ; RV32SFB-NEXT: not a7, a2 ; RV32SFB-NEXT: slli a1, a1, 1 ; RV32SFB-NEXT: srl t0, a0, a2 -; RV32SFB-NEXT: sll a1, a1, a7 ; RV32SFB-NEXT: addi a2, a2, -32 +; RV32SFB-NEXT: sll a1, a1, a7 ; RV32SFB-NEXT: mv a0, a3 -; RV32SFB-NEXT: bgez a2, .LBB21_2 +; RV32SFB-NEXT: bltz a2, .LBB21_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: or a0, t0, a1 +; RV32SFB-NEXT: li a3, 0 ; RV32SFB-NEXT: .LBB21_2: # %entry -; RV32SFB-NEXT: bltz a2, .LBB21_4 +; RV32SFB-NEXT: bgez a2, .LBB21_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: li a3, 0 +; RV32SFB-NEXT: or a0, t0, a1 ; RV32SFB-NEXT: .LBB21_4: # %entry ; RV32SFB-NEXT: beqz a6, .LBB21_6 ; RV32SFB-NEXT: # %bb.5: # %entry -; RV32SFB-NEXT: mv a0, a4 +; RV32SFB-NEXT: mv a3, a5 ; RV32SFB-NEXT: .LBB21_6: # %entry ; RV32SFB-NEXT: beqz a6, .LBB21_8 ; RV32SFB-NEXT: # %bb.7: # %entry -; RV32SFB-NEXT: mv a3, a5 +; RV32SFB-NEXT: mv a0, a4 ; RV32SFB-NEXT: .LBB21_8: # %entry ; RV32SFB-NEXT: mv a1, a3 ; RV32SFB-NEXT: ret @@ -935,24 +935,24 @@ define i64 @select_sra(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) { ; RV32SFB-NEXT: not a7, a2 ; RV32SFB-NEXT: slli t0, a1, 1 ; RV32SFB-NEXT: srl t1, a0, a2 -; RV32SFB-NEXT: sll a7, t0, a7 ; RV32SFB-NEXT: addi a2, a2, -32 +; RV32SFB-NEXT: sll a7, t0, a7 ; RV32SFB-NEXT: mv a0, a3 -; RV32SFB-NEXT: bgez a2, .LBB22_2 +; RV32SFB-NEXT: bltz a2, .LBB22_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: or a0, t1, a7 +; RV32SFB-NEXT: srai a3, a1, 31 ; RV32SFB-NEXT: .LBB22_2: # %entry -; RV32SFB-NEXT: bltz a2, .LBB22_4 +; RV32SFB-NEXT: bgez a2, .LBB22_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: srai a3, a1, 31 +; RV32SFB-NEXT: or a0, t1, a7 ; RV32SFB-NEXT: .LBB22_4: # %entry ; RV32SFB-NEXT: beqz a6, .LBB22_6 ; RV32SFB-NEXT: # %bb.5: # %entry -; RV32SFB-NEXT: mv a0, a4 +; RV32SFB-NEXT: mv a3, a5 ; RV32SFB-NEXT: .LBB22_6: # %entry ; RV32SFB-NEXT: beqz a6, .LBB22_8 ; RV32SFB-NEXT: # %bb.7: # %entry -; RV32SFB-NEXT: mv a3, a5 +; RV32SFB-NEXT: mv a0, a4 ; RV32SFB-NEXT: .LBB22_8: # %entry ; RV32SFB-NEXT: mv a1, a3 ; RV32SFB-NEXT: ret @@ -1088,11 +1088,11 @@ define i64 @select_andi(i64 %A, i64 %C, i1 zeroext %cond) { ; RV32SFB-NEXT: # %bb.1: # %entry ; RV32SFB-NEXT: andi a2, a0, 567 ; RV32SFB-NEXT: .LBB25_2: # %entry +; RV32SFB-NEXT: mv a0, a2 ; RV32SFB-NEXT: bnez a4, .LBB25_4 ; RV32SFB-NEXT: # %bb.3: # %entry ; RV32SFB-NEXT: li a1, 0 ; RV32SFB-NEXT: .LBB25_4: # %entry -; RV32SFB-NEXT: mv a0, a2 ; RV32SFB-NEXT: ret entry: %0 = and i64 %A, 567 @@ -1130,13 +1130,13 @@ define i64 @select_ori(i64 %A, i64 %C, i1 zeroext %cond) { ; ; RV32SFB-LABEL: select_ori: ; RV32SFB: # %bb.0: # %entry -; RV32SFB-NEXT: beqz a4, .LBB26_2 +; RV32SFB-NEXT: bnez a4, .LBB26_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: mv a1, a3 +; RV32SFB-NEXT: ori a2, a0, 890 ; RV32SFB-NEXT: .LBB26_2: # %entry -; RV32SFB-NEXT: bnez a4, .LBB26_4 +; RV32SFB-NEXT: beqz a4, .LBB26_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: ori a2, a0, 890 +; RV32SFB-NEXT: mv a1, a3 ; RV32SFB-NEXT: .LBB26_4: # %entry ; RV32SFB-NEXT: mv a0, a2 ; RV32SFB-NEXT: ret @@ -1176,13 +1176,13 @@ define i64 @select_xori(i64 %A, i64 %C, i1 zeroext %cond) { ; ; RV32SFB-LABEL: select_xori: ; RV32SFB: # %bb.0: # %entry -; RV32SFB-NEXT: beqz a4, .LBB27_2 +; RV32SFB-NEXT: bnez a4, .LBB27_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: mv a1, a3 +; RV32SFB-NEXT: xori a2, a0, 321 ; RV32SFB-NEXT: .LBB27_2: # %entry -; RV32SFB-NEXT: bnez a4, .LBB27_4 +; RV32SFB-NEXT: beqz a4, .LBB27_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: xori a2, a0, 321 +; RV32SFB-NEXT: mv a1, a3 ; RV32SFB-NEXT: .LBB27_4: # %entry ; RV32SFB-NEXT: mv a0, a2 ; RV32SFB-NEXT: ret @@ -1272,11 +1272,11 @@ define i64 @select_srli(i64 %A, i64 %C, i1 zeroext %cond) { ; RV32SFB-NEXT: mv a0, a2 ; RV32SFB-NEXT: bnez a4, .LBB29_2 ; RV32SFB-NEXT: # %bb.1: # %entry -; RV32SFB-NEXT: srli a0, a1, 3 +; RV32SFB-NEXT: li a3, 0 ; RV32SFB-NEXT: .LBB29_2: # %entry ; RV32SFB-NEXT: bnez a4, .LBB29_4 ; RV32SFB-NEXT: # %bb.3: # %entry -; RV32SFB-NEXT: li a3, 0 +; RV32SFB-NEXT: srli a0, a1, 3 ; RV32SFB-NEXT: .LBB29_4: # %entry ; RV32SFB-NEXT: mv a1, a3 ; RV32SFB-NEXT: ret diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 22aae4de4db9d2..3e40bfa1e791d0 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -2171,14 +2171,13 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind { ; KNL-LABEL: test_concat_v2i1: ; KNL: ## %bb.0: ; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 +; KNL-NEXT: vcvtph2ps %xmm0, %xmm1 ; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] ; KNL-NEXT: vucomiss %xmm2, %xmm1 ; KNL-NEXT: setb %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ; KNL-NEXT: vucomiss %xmm2, %xmm0 ; KNL-NEXT: setb %al @@ -2207,14 +2206,13 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind { ; SKX-LABEL: test_concat_v2i1: ; SKX: ## %bb.0: ; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7] ; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 ; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] ; SKX-NEXT: vucomiss %xmm2, %xmm1 ; SKX-NEXT: setb %al ; SKX-NEXT: kmovd %eax, %k0 ; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ; SKX-NEXT: vucomiss %xmm2, %xmm0 ; SKX-NEXT: setb %al diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index b2c54160a0eae2..03cea46f122350 100644 --- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -1436,9 +1436,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; KNL: ## %bb.0: ## %entry ; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; KNL-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A] -; KNL-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55] +; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] ; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9] ; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] @@ -1448,8 +1447,6 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; KNL-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00] ; KNL-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1] ; KNL-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1] -; KNL-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0] -; KNL-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; KNL-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] ; KNL-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1] @@ -1466,9 +1463,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; AVX512BW: ## %bb.0: ## %entry ; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A] -; AVX512BW-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55] +; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] ; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9] ; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] @@ -1478,8 +1474,6 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; AVX512BW-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00] ; AVX512BW-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1] ; AVX512BW-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1] -; AVX512BW-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0] -; AVX512BW-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] ; AVX512BW-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1] @@ -1496,9 +1490,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; SKX: ## %bb.0: ## %entry ; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07] -; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A] -; SKX-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; SKX-NEXT: vpshuflw $85, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc8,0x55] +; SKX-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7] ; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc9] ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2] ; SKX-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca] @@ -1507,8 +1500,6 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) { ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] ; SKX-NEXT: testb %cl, %cl ## encoding: [0x84,0xc9] ; SKX-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0] -; SKX-NEXT: vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] -; SKX-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; SKX-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2] ; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1] diff --git a/llvm/test/CodeGen/X86/cmov-fp.ll b/llvm/test/CodeGen/X86/cmov-fp.ll index 26e720ffcebccd..77665d083b7e3e 100644 --- a/llvm/test/CodeGen/X86/cmov-fp.ll +++ b/llvm/test/CodeGen/X86/cmov-fp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-- -mcpu pentium4 < %s | FileCheck %s -check-prefix=SSE -; RUN: llc -mtriple=i686-- -mcpu pentium3 < %s | FileCheck %s -check-prefix=NOSSE2 -; RUN: llc -mtriple=i686-- -mcpu pentium2 < %s | FileCheck %s -check-prefix=NOSSE1 +; RUN: llc -mtriple=i686-- -mcpu pentium3 < %s | FileCheck %s -check-prefixes=NOSSE,NOSSE2 +; RUN: llc -mtriple=i686-- -mcpu pentium2 < %s | FileCheck %s -check-prefixes=NOSSE,NOSSE1 ; RUN: llc -mtriple=i686-- -mcpu pentium < %s | FileCheck %s -check-prefix=NOCMOV ; PR14035 @@ -27,27 +27,16 @@ define double @test1(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test1: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovnbe %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test1: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovnbe %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test1: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovnbe %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test1: ; NOCMOV: # %bb.0: @@ -90,27 +79,16 @@ define double @test2(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test2: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovnb %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test2: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovnb %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test2: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovnb %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test2: ; NOCMOV: # %bb.0: @@ -153,27 +131,16 @@ define double @test3(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test3: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovb %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test3: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovb %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test3: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovb %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test3: ; NOCMOV: # %bb.0: @@ -216,27 +183,16 @@ define double @test4(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test4: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovbe %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test4: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovbe %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test4: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovbe %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test4: ; NOCMOV: # %bb.0: @@ -279,31 +235,18 @@ define double @test5(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test5: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setg %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test5: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setg %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test5: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setg %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test5: ; NOCMOV: # %bb.0: @@ -346,31 +289,18 @@ define double @test6(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test6: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setge %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test6: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setge %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test6: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setge %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test6: ; NOCMOV: # %bb.0: @@ -413,31 +343,18 @@ define double @test7(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test7: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setl %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test7: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setl %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test7: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setl %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test7: ; NOCMOV: # %bb.0: @@ -480,31 +397,18 @@ define double @test8(i32 %a, i32 %b, double %x) nounwind { ; SSE-NEXT: popl %ebp ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test8: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setle %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test8: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldl {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setle %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test8: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setle %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test8: ; NOCMOV: # %bb.0: @@ -1065,27 +969,16 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test17: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovnbe %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test17: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovnbe %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test17: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovnbe %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test17: ; NOCMOV: # %bb.0: @@ -1118,27 +1011,16 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test18: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovnb %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test18: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovnb %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test18: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovnb %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test18: ; NOCMOV: # %bb.0: @@ -1171,27 +1053,16 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test19: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovb %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test19: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovb %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test19: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovb %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test19: ; NOCMOV: # %bb.0: @@ -1224,27 +1095,16 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test20: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovbe %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test20: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovbe %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test20: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovbe %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test20: ; NOCMOV: # %bb.0: @@ -1279,31 +1139,18 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test21: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setg %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test21: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setg %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test21: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setg %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test21: ; NOCMOV: # %bb.0: @@ -1339,31 +1186,18 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test22: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setge %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test22: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setge %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test22: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setge %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test22: ; NOCMOV: # %bb.0: @@ -1398,31 +1232,18 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test23: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setl %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test23: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setl %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test23: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setl %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test23: ; NOCMOV: # %bb.0: @@ -1457,31 +1278,18 @@ define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind { ; SSE-NEXT: fstp %st(1) ; SSE-NEXT: retl ; -; NOSSE2-LABEL: test24: -; NOSSE2: # %bb.0: -; NOSSE2-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE2-NEXT: setle %al -; NOSSE2-NEXT: testb %al, %al -; NOSSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE2-NEXT: fxch %st(1) -; NOSSE2-NEXT: fcmovne %st(1), %st -; NOSSE2-NEXT: fstp %st(1) -; NOSSE2-NEXT: retl -; -; NOSSE1-LABEL: test24: -; NOSSE1: # %bb.0: -; NOSSE1-NEXT: fldt {{[0-9]+}}(%esp) -; NOSSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; NOSSE1-NEXT: setle %al -; NOSSE1-NEXT: testb %al, %al -; NOSSE1-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; NOSSE1-NEXT: fxch %st(1) -; NOSSE1-NEXT: fcmovne %st(1), %st -; NOSSE1-NEXT: fstp %st(1) -; NOSSE1-NEXT: retl +; NOSSE-LABEL: test24: +; NOSSE: # %bb.0: +; NOSSE-NEXT: fldt {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: setle %al +; NOSSE-NEXT: testb %al, %al +; NOSSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; NOSSE-NEXT: fxch %st(1) +; NOSSE-NEXT: fcmovne %st(1), %st +; NOSSE-NEXT: fstp %st(1) +; NOSSE-NEXT: retl ; ; NOCMOV-LABEL: test24: ; NOCMOV: # %bb.0: diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll index 59097f8fb5d247..c7ef353f7f6038 100644 --- a/llvm/test/CodeGen/X86/cvt16.ll +++ b/llvm/test/CodeGen/X86/cvt16.ll @@ -89,7 +89,6 @@ define float @test3(float %src) nounwind uwtable readnone { ; F16C-LABEL: test3: ; F16C: # %bb.0: ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/f16c-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/f16c-intrinsics-fast-isel.ll index e114c205d7972b..1886e2911ede80 100644 --- a/llvm/test/CodeGen/X86/f16c-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/f16c-intrinsics-fast-isel.ll @@ -18,8 +18,7 @@ define float @test_cvtsh_ss(i16 %a0) nounwind { ; ; X64-LABEL: test_cvtsh_ss: ; X64: # %bb.0: -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: vmovd %eax, %xmm0 +; X64-NEXT: vmovd %edi, %xmm0 ; X64-NEXT: vcvtph2ps %xmm0, %xmm0 ; X64-NEXT: retq %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 @@ -41,8 +40,6 @@ define i16 @test_cvtss_sh(float %a0) nounwind { ; X86-LABEL: test_cvtss_sh: ; X86: # %bb.0: ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X86-NEXT: vcvtps2ph $0, %xmm0, %xmm0 ; X86-NEXT: vmovd %xmm0, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -50,8 +47,6 @@ define i16 @test_cvtss_sh(float %a0) nounwind { ; ; X64-LABEL: test_cvtss_sh: ; X64: # %bb.0: -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64-NEXT: vcvtps2ph $0, %xmm0, %xmm0 ; X64-NEXT: vmovd %xmm0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 5f326b6d6998fb..8f875c70a25f6d 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -1432,7 +1432,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind { ; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax ; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 @@ -1447,7 +1446,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind { ; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax ; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 @@ -1550,7 +1548,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind { ; CHECK-NO-FASTFMA-NEXT: movzwl %ax, %eax ; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 @@ -1566,7 +1563,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind { ; CHECK-FMA-NEXT: movzwl %ax, %eax ; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/fp-roundeven.ll b/llvm/test/CodeGen/X86/fp-roundeven.ll index fed2060dabd3af..8037c783dd8e67 100644 --- a/llvm/test/CodeGen/X86/fp-roundeven.ll +++ b/llvm/test/CodeGen/X86/fp-roundeven.ll @@ -51,7 +51,6 @@ define half @roundeven_f16(half %h) { ; AVX512F-LABEL: roundeven_f16: ; AVX512F: ## %bb.0: ## %entry ; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: movzwl %ax, %eax ; AVX512F-NEXT: vmovd %eax, %xmm0 ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index a3fb71f817ce47..6aad4c2ebba1d8 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -698,24 +698,23 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind { ; ; AVX2-LABEL: stest_f16i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rcx -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm1, %rax ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm0, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 @@ -837,7 +836,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; ; AVX2-LABEL: utesth_f16i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 @@ -846,29 +845,28 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx ; AVX2-NEXT: andq %rax, %rdx -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: vmovq %rdx, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx +; AVX2-NEXT: vmovq %rdx, %xmm2 ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX2-NEXT: andq %rax, %rdx -; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm4 -; AVX2-NEXT: vcvttss2si %xmm2, %rcx -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm3[0] +; AVX2-NEXT: vcvttss2si %xmm3, %rcx +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx ; AVX2-NEXT: andq %rax, %rdx ; AVX2-NEXT: orq %rcx, %rdx -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax @@ -879,7 +877,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind { ; AVX2-NEXT: andq %rax, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm1 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] @@ -1001,24 +999,23 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind { ; ; AVX2-LABEL: ustest_f16i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax ; AVX2-NEXT: vmovq %rax, %xmm1 -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm2, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm2, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vcvttss2si %xmm0, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 @@ -3313,24 +3310,23 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX2-LABEL: stest_f16i32_mm: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rcx -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm1, %rax ; AVX2-NEXT: vmovq %rcx, %xmm1 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm0, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 @@ -3450,7 +3446,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX2-LABEL: utesth_f16i32_mm: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2 ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 @@ -3459,29 +3455,28 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx ; AVX2-NEXT: andq %rax, %rdx -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: vmovq %rdx, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm2, %rcx +; AVX2-NEXT: vmovq %rdx, %xmm2 ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX2-NEXT: andq %rax, %rdx -; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm4 -; AVX2-NEXT: vcvttss2si %xmm2, %rcx -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm3[0] +; AVX2-NEXT: vcvttss2si %xmm3, %rcx +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] ; AVX2-NEXT: movq %rcx, %rdx ; AVX2-NEXT: sarq $63, %rdx ; AVX2-NEXT: andq %rax, %rdx ; AVX2-NEXT: orq %rcx, %rdx -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax @@ -3492,7 +3487,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind { ; AVX2-NEXT: andq %rax, %rdx ; AVX2-NEXT: orq %rcx, %rdx ; AVX2-NEXT: vmovq %rdx, %xmm1 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm1 @@ -3613,24 +3608,23 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind { ; ; AVX2-LABEL: ustest_f16i32_mm: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1 +; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvttss2si %xmm1, %rax ; AVX2-NEXT: vmovq %rax, %xmm1 -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm2, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] -; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 ; AVX2-NEXT: vcvttss2si %xmm2, %rax ; AVX2-NEXT: vmovq %rax, %xmm2 -; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX2-NEXT: vcvttss2si %xmm0, %rax ; AVX2-NEXT: vmovq %rax, %xmm0 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 2e1322446032ff..9f01d07e6a6705 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -851,16 +851,14 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { ; ; BWON-F16C-LABEL: test_sitofp_fadd_i32: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: movzwl (%rsi), %eax ; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; BWON-F16C-NEXT: movzwl (%rsi), %eax ; BWON-F16C-NEXT: vmovd %eax, %xmm1 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 ; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: retq ; @@ -919,7 +917,6 @@ define half @PR40273(half) #0 { ; BWON-F16C-LABEL: PR40273: ; BWON-F16C: # %bb.0: ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: movzwl %ax, %eax ; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: xorl %eax, %eax @@ -973,7 +970,6 @@ define void @brcond(half %0) #0 { ; BWON-F16C-LABEL: brcond: ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: movzwl %ax, %eax ; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -1029,7 +1025,6 @@ define half @test_sqrt(half %0) #0 { ; BWON-F16C-LABEL: test_sqrt: ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax -; BWON-F16C-NEXT: movzwl %ax, %eax ; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 @@ -1083,7 +1078,6 @@ define void @main.158() #0 { ; BWON-F16C: # %bb.0: # %entry ; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0] ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2 @@ -1172,8 +1166,7 @@ define void @main.45() #0 { ; ; BWON-F16C-LABEL: main.45: ; BWON-F16C: # %bb.0: # %entry -; BWON-F16C-NEXT: movzwl (%rax), %eax -; BWON-F16C-NEXT: vmovd %eax, %xmm0 +; BWON-F16C-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: xorl %eax, %eax @@ -1345,10 +1338,8 @@ define half @pr61271(half %0, half %1) #0 { ; BWON-F16C: # %bb.0: ; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax ; BWON-F16C-NEXT: vpextrw $0, %xmm1, %ecx -; BWON-F16C-NEXT: movzwl %cx, %ecx ; BWON-F16C-NEXT: vmovd %ecx, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; BWON-F16C-NEXT: movzwl %ax, %eax ; BWON-F16C-NEXT: vmovd %eax, %xmm1 ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 ; BWON-F16C-NEXT: vminss %xmm0, %xmm1, %xmm0 @@ -1614,10 +1605,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; ; BWON-F16C-LABEL: maxnum_v8f16: ; BWON-F16C: # %bb.0: -; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm3 = [10,11,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; BWON-F16C-NEXT: vpshufb %xmm3, %xmm1, %xmm2 +; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 -; BWON-F16C-NEXT: vpshufb %xmm3, %xmm0, %xmm3 +; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 ; BWON-F16C-NEXT: vucomiss %xmm2, %xmm3 ; BWON-F16C-NEXT: ja .LBB26_2 @@ -1625,10 +1615,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: vmovaps %xmm2, %xmm3 ; BWON-F16C-NEXT: .LBB26_2: ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm2 -; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; BWON-F16C-NEXT: vpshufb %xmm4, %xmm1, %xmm3 +; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 -; BWON-F16C-NEXT: vpshufb %xmm4, %xmm0, %xmm4 +; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[3,3,3,3] ; BWON-F16C-NEXT: vcvtph2ps %xmm4, %xmm4 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm4 ; BWON-F16C-NEXT: ja .LBB26_4 @@ -1638,10 +1627,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: vmovd %xmm2, %eax ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm4, %xmm2 ; BWON-F16C-NEXT: vmovd %xmm2, %ecx -; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; BWON-F16C-NEXT: vpshufb %xmm2, %xmm1, %xmm3 -; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 -; BWON-F16C-NEXT: vpshufb %xmm2, %xmm0, %xmm2 +; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3 +; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2 ; BWON-F16C-NEXT: ja .LBB26_6 @@ -1650,9 +1638,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: .LBB26_6: ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 ; BWON-F16C-NEXT: vmovd %xmm2, %edx -; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm3 -; BWON-F16C-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; BWON-F16C-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 ; BWON-F16C-NEXT: vucomiss %xmm3, %xmm2 ; BWON-F16C-NEXT: ja .LBB26_8 @@ -1661,10 +1649,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: .LBB26_8: ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 ; BWON-F16C-NEXT: vmovd %xmm2, %esi -; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm3 = [4,5,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; BWON-F16C-NEXT: vpshufb %xmm3, %xmm1, %xmm2 +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[3,3,3,3,4,5,6,7] ; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 -; BWON-F16C-NEXT: vpshufb %xmm3, %xmm0, %xmm3 +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[3,3,3,3,4,5,6,7] ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm6 ; BWON-F16C-NEXT: vucomiss %xmm2, %xmm6 ; BWON-F16C-NEXT: ja .LBB26_10 @@ -1677,9 +1664,9 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: vpinsrw $0, %esi, %xmm0, %xmm5 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm6 ; BWON-F16C-NEXT: vmovd %xmm6, %eax -; BWON-F16C-NEXT: vpsrlq $48, %xmm1, %xmm6 +; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3] ; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm7 -; BWON-F16C-NEXT: vpsrlq $48, %xmm0, %xmm6 +; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] ; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6 ; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6 ; BWON-F16C-NEXT: ja .LBB26_12 @@ -1687,29 +1674,26 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6 ; BWON-F16C-NEXT: .LBB26_12: ; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] +; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4 ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm5 ; BWON-F16C-NEXT: vmovd %xmm5, %eax ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm5 -; BWON-F16C-NEXT: vmovdqa {{.*#+}} xmm6 = [2,3,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; BWON-F16C-NEXT: vpshufb %xmm6, %xmm1, %xmm7 -; BWON-F16C-NEXT: vcvtph2ps %xmm7, %xmm7 -; BWON-F16C-NEXT: vpshufb %xmm6, %xmm0, %xmm6 -; BWON-F16C-NEXT: vcvtph2ps %xmm6, %xmm6 +; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm7 +; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm6 ; BWON-F16C-NEXT: vucomiss %xmm7, %xmm6 ; BWON-F16C-NEXT: ja .LBB26_14 ; BWON-F16C-NEXT: # %bb.13: ; BWON-F16C-NEXT: vmovaps %xmm7, %xmm6 ; BWON-F16C-NEXT: .LBB26_14: -; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] +; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm6, %xmm4 ; BWON-F16C-NEXT: vmovd %xmm4, %eax ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm4 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7] ; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 ; BWON-F16C-NEXT: ja .LBB26_16 @@ -1719,7 +1703,7 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 { ; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; BWON-F16C-NEXT: vmovd %xmm0, %eax ; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 -; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] ; BWON-F16C-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ; BWON-F16C-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; BWON-F16C-NEXT: retq diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll index 5fde9bd5566b40..9946267b48e7ff 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -635,7 +635,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-LABEL: test_reduce_v4i64: ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll index 699dce75e505c7..0bbf94f1817f51 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -581,7 +581,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-AVX2-LABEL: test_reduce_v4i64: ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 diff --git a/llvm/test/CodeGen/X86/inline-asm-memop.ll b/llvm/test/CodeGen/X86/inline-asm-memop.ll new file mode 100644 index 00000000000000..83442498076102 --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-memop.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O0 < %s | FileCheck %s + +; A bug in X86DAGToDAGISel::matchAddressRecursively create a zext SDValue which +; is quickly replaced by other SDValue but already pushed into vector for later +; calling for SelectionDAGISel::Select_INLINEASM getNode builder, see issue +; 82431 for more infomation. + +define void @PR82431(i8 %call, ptr %b) { +; CHECK-LABEL: PR82431: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb %dil, %al +; CHECK-NEXT: addb $1, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: def $rax killed $eax +; CHECK-NEXT: shlq $3, %rax +; CHECK-NEXT: addq %rax, %rsi +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: retq +entry: + %narrow = add nuw i8 %call, 1 + %idxprom = zext i8 %narrow to i64 + %arrayidx = getelementptr [1 x i64], ptr %b, i64 0, i64 %idxprom + tail call void asm "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %arrayidx, ptr elementtype(i64) %arrayidx) + ret void +} diff --git a/llvm/test/CodeGen/X86/pr31088.ll b/llvm/test/CodeGen/X86/pr31088.ll index a21653bc7330c9..ce37622c476db4 100644 --- a/llvm/test/CodeGen/X86/pr31088.ll +++ b/llvm/test/CodeGen/X86/pr31088.ll @@ -41,9 +41,7 @@ define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { ; ; F16C-LABEL: ir_fadd_v1f16: ; F16C: # %bb.0: -; F16C-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -54,13 +52,15 @@ define <1 x half> @ir_fadd_v1f16(<1 x half> %arg0, <1 x half> %arg1) nounwind { ; F16C-O0-LABEL: ir_fadd_v1f16: ; F16C-O0: # %bb.0: ; F16C-O0-NEXT: vpextrw $0, %xmm1, %eax -; F16C-O0-NEXT: # kill: def $ax killed $ax killed $eax -; F16C-O0-NEXT: movzwl %ax, %eax +; F16C-O0-NEXT: movw %ax, %cx +; F16C-O0-NEXT: # implicit-def: $eax +; F16C-O0-NEXT: movw %cx, %ax ; F16C-O0-NEXT: vmovd %eax, %xmm1 ; F16C-O0-NEXT: vcvtph2ps %xmm1, %xmm1 ; F16C-O0-NEXT: vpextrw $0, %xmm0, %eax -; F16C-O0-NEXT: # kill: def $ax killed $ax killed $eax -; F16C-O0-NEXT: movzwl %ax, %eax +; F16C-O0-NEXT: movw %ax, %cx +; F16C-O0-NEXT: # implicit-def: $eax +; F16C-O0-NEXT: movw %cx, %ax ; F16C-O0-NEXT: vmovd %eax, %xmm0 ; F16C-O0-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-O0-NEXT: vaddss %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr57340.ll b/llvm/test/CodeGen/X86/pr57340.ll index 95f839c338e701..00a52c639e43c6 100644 --- a/llvm/test/CodeGen/X86/pr57340.ll +++ b/llvm/test/CodeGen/X86/pr57340.ll @@ -5,29 +5,28 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-LABEL: main.41: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpbroadcastw (%rax), %xmm0 -; CHECK-NEXT: vmovdqu (%rax), %ymm1 -; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 -; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm3 = [31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] -; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm3 -; CHECK-NEXT: vmovdqu (%rax), %xmm10 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpshufb %xmm1, %xmm10, %xmm2 -; CHECK-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-NEXT: vpshufb %xmm1, %xmm3, %xmm4 -; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4 -; CHECK-NEXT: vucomiss %xmm4, %xmm2 -; CHECK-NEXT: setnp %al -; CHECK-NEXT: sete %cl -; CHECK-NEXT: testb %al, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: kmovd %eax, %k0 +; CHECK-NEXT: vpextrw $0, %xmm0, %eax +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 +; CHECK-NEXT: vmovdqu (%rax), %ymm3 +; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm2 = [31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14] +; CHECK-NEXT: vpermi2w %ymm1, %ymm3, %ymm2 +; CHECK-NEXT: vprold $16, %xmm2, %xmm1 +; CHECK-NEXT: vcvtph2ps %xmm1, %xmm3 +; CHECK-NEXT: vmovdqu (%rax), %xmm5 +; CHECK-NEXT: vprold $16, %xmm5, %xmm1 +; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 +; CHECK-NEXT: vucomiss %xmm3, %xmm1 +; CHECK-NEXT: setnp %cl +; CHECK-NEXT: sete %dl +; CHECK-NEXT: testb %cl, %dl +; CHECK-NEXT: setne %cl +; CHECK-NEXT: kmovd %ecx, %k0 ; CHECK-NEXT: kshiftlw $15, %k0, %k0 +; CHECK-NEXT: vmovd %eax, %xmm3 +; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 +; CHECK-NEXT: vcvtph2ps %xmm5, %xmm6 ; CHECK-NEXT: kshiftrw $14, %k0, %k0 -; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm10[0],zero,zero,zero,xmm10[1],zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm4, %xmm11 -; CHECK-NEXT: vucomiss %xmm0, %xmm11 +; CHECK-NEXT: vucomiss %xmm3, %xmm6 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -38,10 +37,10 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-5, %ax ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = [4,5,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm5 -; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5 -; CHECK-NEXT: vucomiss %xmm5, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] +; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 +; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT: vucomiss %xmm3, %xmm0 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -52,12 +51,12 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-9, %ax ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpsrlq $48, %xmm3, %xmm5 -; CHECK-NEXT: vcvtph2ps %xmm5, %xmm6 -; CHECK-NEXT: vpsrlq $48, %xmm10, %xmm5 -; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm6, %xmm5 +; CHECK-NEXT: vprolq $16, %xmm2, %xmm3 +; CHECK-NEXT: vcvtph2ps %xmm3, %xmm4 +; CHECK-NEXT: vprolq $16, %xmm5, %xmm3 +; CHECK-NEXT: vcvtph2ps %xmm3, %xmm3 +; CHECK-NEXT: vucomiss %xmm4, %xmm3 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -68,11 +67,10 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-17, %ax ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm6, %xmm3, %xmm7 -; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7 -; CHECK-NEXT: vucomiss %xmm7, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] +; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4 +; CHECK-NEXT: vucomiss %xmm4, %xmm0 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -83,13 +81,12 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-33, %ax ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm7 = [10,11,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpshufb %xmm7, %xmm10, %xmm8 -; CHECK-NEXT: vcvtph2ps %xmm8, %xmm8 +; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vcvtph2ps %xmm4, %xmm7 +; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm5[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vcvtph2ps %xmm4, %xmm4 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm7, %xmm3, %xmm9 -; CHECK-NEXT: vcvtph2ps %xmm9, %xmm9 -; CHECK-NEXT: vucomiss %xmm9, %xmm8 +; CHECK-NEXT: vucomiss %xmm7, %xmm4 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -100,11 +97,10 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-65, %ax ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa {{.*#+}} xmm9 = [12,13,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; CHECK-NEXT: vpshufb %xmm9, %xmm3, %xmm12 -; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm12, %xmm0 +; CHECK-NEXT: vshufps {{.*#+}} xmm7 = xmm2[3,3,3,3] +; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7 +; CHECK-NEXT: vucomiss %xmm7, %xmm0 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -116,11 +112,11 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-129, %ax ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm12 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm10 = xmm10[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm10, %xmm10 -; CHECK-NEXT: vucomiss %xmm12, %xmm10 +; CHECK-NEXT: vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vcvtph2ps %xmm7, %xmm7 +; CHECK-NEXT: vpsrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vcvtph2ps %xmm5, %xmm5 +; CHECK-NEXT: vucomiss %xmm7, %xmm5 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -131,11 +127,10 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-257, %ax # imm = 0xFEFF ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vextracti128 $1, %ymm3, %xmm3 -; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm12 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; CHECK-NEXT: vcvtph2ps %xmm12, %xmm12 +; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm2 +; CHECK-NEXT: vcvtph2ps %xmm2, %xmm7 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm12, %xmm11 +; CHECK-NEXT: vucomiss %xmm7, %xmm6 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -147,9 +142,9 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-513, %ax # imm = 0xFDFF ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm1, %xmm3, %xmm1 -; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm2 +; CHECK-NEXT: vprold $16, %xmm2, %xmm6 +; CHECK-NEXT: vcvtph2ps %xmm6, %xmm6 +; CHECK-NEXT: vucomiss %xmm6, %xmm1 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -161,7 +156,7 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-1025, %ax # imm = 0xFBFF ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 ; CHECK-NEXT: setnp %al @@ -175,9 +170,9 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-2049, %ax # imm = 0xF7FF ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpsrlq $48, %xmm3, %xmm1 +; CHECK-NEXT: vprolq $16, %xmm2, %xmm1 ; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm5 +; CHECK-NEXT: vucomiss %xmm1, %xmm3 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -189,7 +184,7 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-4097, %ax # imm = 0xEFFF ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm6, %xmm3, %xmm1 +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] ; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 ; CHECK-NEXT: setnp %al @@ -203,9 +198,9 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: movw $-8193, %ax # imm = 0xDFFF ; CHECK-NEXT: kmovd %eax, %k1 ; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: vpshufb %xmm7, %xmm3, %xmm1 +; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-NEXT: vucomiss %xmm1, %xmm8 +; CHECK-NEXT: vucomiss %xmm1, %xmm4 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl @@ -216,7 +211,7 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: movw $-16385, %ax # imm = 0xBFFF ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb %xmm9, %xmm3, %xmm1 +; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm2[3,3,3,3] ; CHECK-NEXT: vcvtph2ps %xmm1, %xmm1 ; CHECK-NEXT: kandw %k1, %k0, %k0 ; CHECK-NEXT: vucomiss %xmm1, %xmm0 @@ -228,10 +223,10 @@ define void @main.41() local_unnamed_addr #1 { ; CHECK-NEXT: kshiftlw $14, %k1, %k1 ; CHECK-NEXT: korw %k1, %k0, %k0 ; CHECK-NEXT: kshiftlw $1, %k0, %k0 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: vucomiss %xmm0, %xmm10 +; CHECK-NEXT: vucomiss %xmm0, %xmm5 ; CHECK-NEXT: setnp %al ; CHECK-NEXT: sete %cl ; CHECK-NEXT: testb %al, %cl diff --git a/llvm/test/CodeGen/X86/prefer-fpext-splat.ll b/llvm/test/CodeGen/X86/prefer-fpext-splat.ll index 1d8b8b3f9a96ec..c3d7b2e15d0170 100644 --- a/llvm/test/CodeGen/X86/prefer-fpext-splat.ll +++ b/llvm/test/CodeGen/X86/prefer-fpext-splat.ll @@ -176,8 +176,6 @@ define <2 x double> @prefer_f16_v2f64(ptr %p) nounwind { ; AVX512F-LABEL: prefer_f16_v2f64: ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vcvtps2pd %xmm0, %xmm0 ; AVX512F-NEXT: retq diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll index fdb2f41ec0e498..d644ed87c3c108 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -267,11 +267,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: movl $3, %eax +; SSE41-NEXT: movq %rax, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: pxor %xmm1, %xmm3 ; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: pextrb $8, %xmm0, %edx -; SSE41-NEXT: pextrb $0, %xmm2, %ecx +; SSE41-NEXT: pextrb $0, %xmm3, %ecx ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: # kill: def $dl killed $dl killed $edx ; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx @@ -318,7 +320,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: movl $3, %eax +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll index abfad67d801a96..d11a08ec1076d5 100644 --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -67,7 +67,7 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) { ; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 ; CHECK-X64-NEXT: je .LBB1_3 ; CHECK-X64-NEXT: # %bb.1: -; CHECK-X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-X64-NEXT: pslld $8, %xmm0 ; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax ; CHECK-X64-NEXT: testb $1, %al diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll index a15de8b8e0f6ad..6a36cd2a86d5cd 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll @@ -264,7 +264,6 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind { ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll index 6d99bedd40b91c..cdeca96732dc31 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -25,13 +25,7 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind { define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: t1_all_odd_eq: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -82,13 +76,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: t1_all_odd_ne: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -256,7 +244,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555 +; CHECK-AVX1-NEXT: vmovq %rax, %xmm1 +; CHECK-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero @@ -273,7 +263,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555 +; CHECK-AVX2-NEXT: vmovq %rax, %xmm1 +; CHECK-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 3b82df5d5b74d2..ba21af231985a1 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -21,15 +21,13 @@ define float @cvt_i16_to_f32(i16 %a0) nounwind { ; ; F16C-LABEL: cvt_i16_to_f32: ; F16C: # %bb.0: -; F16C-NEXT: movzwl %di, %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vmovd %edi, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: cvt_i16_to_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: movzwl %di, %eax -; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vmovd %edi, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = bitcast i16 %a0 to half @@ -1370,16 +1368,14 @@ define double @cvt_i16_to_f64(i16 %a0) nounwind { ; ; F16C-LABEL: cvt_i16_to_f64: ; F16C: # %bb.0: -; F16C-NEXT: movzwl %di, %eax -; F16C-NEXT: vmovd %eax, %xmm0 +; F16C-NEXT: vmovd %edi, %xmm0 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: cvt_i16_to_f64: ; AVX512: # %bb.0: -; AVX512-NEXT: movzwl %di, %eax -; AVX512-NEXT: vmovd %eax, %xmm0 +; AVX512-NEXT: vmovd %edi, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -1410,14 +1406,12 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { ; ; F16C-LABEL: cvt_2i16_to_2f64: ; F16C: # %bb.0: -; F16C-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtps2pd %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: cvt_2i16_to_2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -1503,14 +1497,12 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { ; ; F16C-LABEL: cvt_8i16_to_2f64: ; F16C: # %bb.0: -; F16C-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtps2pd %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: cvt_8i16_to_2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -1877,16 +1869,14 @@ define <2 x double> @load_cvt_2i16_to_2f64(ptr %a0) nounwind { ; ; F16C-LABEL: load_cvt_2i16_to_2f64: ; F16C: # %bb.0: -; F16C-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; F16C-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvtps2pd %xmm0, %xmm0 ; F16C-NEXT: retq ; ; AVX512-LABEL: load_cvt_2i16_to_2f64: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -4976,9 +4966,9 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; ; F16C-LABEL: fptosi_2f16_to_4i32: ; F16C: # %bb.0: -; F16C-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; F16C-NEXT: vpsrld $16, %xmm0, %xmm1 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; F16C-NEXT: vcvttps2dq %xmm0, %xmm0 @@ -4987,9 +4977,9 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ; ; AVX512-LABEL: fptosi_2f16_to_4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index 6e8eefc607ee11..24113441a4e25a 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -413,10 +413,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3 ; AVX512F-NEXT: xorl %eax, %eax ; AVX512F-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-NEXT: movl $255, %ecx @@ -430,10 +428,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 ; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm3, %xmm2 ; AVX512VL-NEXT: movl $255, %ecx diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 804ca183ad4c9d..edefb16d40e6ed 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -412,10 +412,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vcvtph2ps %xmm3, %xmm3 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3 ; AVX512F-NEXT: xorl %eax, %eax ; AVX512F-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-NEXT: movl $255, %ecx @@ -429,10 +427,8 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 ; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm3, %xmm2 ; AVX512VL-NEXT: movl $255, %ecx diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll index 4799b8e7e5857b..3b25a6e033f2fd 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -210,7 +210,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-LABEL: test_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll index 75eeec456c9ac3..2d68cf9d6374d7 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -211,7 +211,7 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-LABEL: test_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 ; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index ce3dc8cc873cc7..cc4eb0c8f7343b 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -741,14 +741,24 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) { ; SSE-NEXT: shll $15, %eax ; SSE-NEXT: retq ; -; AVX-LABEL: vselect_any_extend_vector_inreg_crash: -; AVX: # %bb.0: -; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: shll $15, %eax -; AVX-NEXT: retq +; AVX1-LABEL: vselect_any_extend_vector_inreg_crash: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: shll $15, %eax +; AVX1-NEXT: retq +; +; AVX2-LABEL: vselect_any_extend_vector_inreg_crash: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [49,49,49,49] +; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: shll $15, %eax +; AVX2-NEXT: retq 0: %1 = load <8 x i8>, ptr %x %2 = icmp eq <8 x i8> %1, diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s index bfca71ae3a01ef..f447263c30223d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s @@ -2052,3 +2052,15 @@ global_atomic_cond_sub_u32 v0, v2, s[0:1] offset:64 global_atomic_ordered_add_b64 v0, v[2:3], s[0:1] offset:64 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_subrev_u32 v1, v2 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_subrev_rtn_u32 v5, v1, v2 +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_subrev_u64 v1, v[2:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU + +ds_subrev_rtn_u64 v[5:6], v1, v[2:3] +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_ds_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_ds_alias.s index aa063c8800aa41..057e99330bcaef 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_ds_alias.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_ds_alias.s @@ -27,5 +27,11 @@ ds_min_rtn_f64 v[5:6], v1, v[2:3] ds_subrev_u32 v1, v2 // GFX12: ds_rsub_u32 v1, v2 ; encoding: [0x00,0x00,0x08,0xd8,0x01,0x02,0x00,0x00] +ds_subrev_rtn_u32 v5, v1, v2 +// GFX12: ds_rsub_rtn_u32 v5, v1, v2 ; encoding: [0x00,0x00,0x88,0xd8,0x01,0x02,0x00,0x05] + ds_subrev_u64 v1, v[2:3] // GFX12: ds_rsub_u64 v1, v[2:3] ; encoding: [0x00,0x00,0x08,0xd9,0x01,0x02,0x00,0x00] + +ds_subrev_rtn_u64 v[5:6], v1, v[2:3] +// GFX12: ds_rsub_rtn_u64 v[5:6], v1, v[2:3] ; encoding: [0x00,0x00,0x88,0xd9,0x01,0x02,0x00,0x05] diff --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll index 47b2ddafcfc650..dd9310fe34f341 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll @@ -9,6 +9,11 @@ ; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetX:[0-9]*]])) ; ^ No deref at the end, as this variable ("x") is an array; ; its value is its address. The entire array is in the frame. +; CHECK: call void @llvm.dbg.assign(metadata ptr %[[frame]] +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetX]]) +;; FIXME: Should we be updating the addresses on assigns here as well? +; CHECK-SAME: , metadata ptr %[[frame]], metadata !DIExpression()) + ; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] ; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetSpill:[0-9]*]], DW_OP_deref)) ; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] @@ -78,6 +83,7 @@ init.ready: ; preds = %init.suspend, %coro %i.init.ready.inc = add nsw i32 0, 1 call void @llvm.dbg.value(metadata i32 %i.init.ready.inc, metadata !6, metadata !DIExpression()), !dbg !11 call void @llvm.dbg.value(metadata ptr %x, metadata !12, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.assign(metadata ptr %x, metadata !12, metadata !DIExpression(), metadata !30, metadata ptr %x, metadata !DIExpression()), !dbg !17 call void @llvm.memset.p0.i64(ptr align 16 %x, i8 0, i64 40, i1 false), !dbg !17 call void @print(i32 %i.init.ready.inc) %ready.again = call zeroext i1 @await_ready() @@ -250,3 +256,4 @@ attributes #4 = { argmemonly nofree nosync nounwind willreturn writeonly } !21 = !DILocation(line: 43, column: 3, scope: !7) !22 = !DILocation(line: 43, column: 8, scope: !7) !23 = !DILocalVariable(name: "produced", scope: !7, file: !1, line:24, type: !10) +!30 = distinct !DIAssignID() \ No newline at end of file diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-node.ll new file mode 100644 index 00000000000000..1940e1bc8d18ac --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-node.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-linux-gnu < %s | FileCheck %s + +define void @test(ptr noalias %arg, ptr noalias %arg1, ptr %arg2) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr noalias [[ARG:%.*]], ptr noalias [[ARG1:%.*]], ptr [[ARG2:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP_I_I:%.*]] = getelementptr i8, ptr [[ARG1]], i64 24 +; CHECK-NEXT: [[TMP_I_I4:%.*]] = getelementptr i8, ptr [[ARG]], i64 24 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[TMP_I_I]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: store float [[TMP1]], ptr [[ARG2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <4 x float> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP3]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP_I_I4]], align 8 +; CHECK-NEXT: ret void +; +bb: + %tmp.i.i = getelementptr i8, ptr %arg1, i64 24 + %tmp1.i.i = load float, ptr %tmp.i.i, align 8 + %tmp.i.i2 = getelementptr i8, ptr %arg1, i64 32 + %tmp1.i.i3 = load float, ptr %tmp.i.i2, align 8 + %tmp1.i.i.i = fcmp olt float %tmp1.i.i3, 0.000000e+00 + %tmp9 = select i1 %tmp1.i.i.i, float %tmp1.i.i3, float %tmp1.i.i + %tmp.i.i4 = getelementptr i8, ptr %arg, i64 24 + store float %tmp9, ptr %tmp.i.i4, align 8 + %tmp1.i.i.i10 = fcmp olt float %tmp1.i.i, 0.000000e+00 + %tmp13 = select i1 %tmp1.i.i.i10, float %tmp1.i.i3, float %tmp1.i.i + %tmp.i.i12 = getelementptr i8, ptr %arg, i64 28 + store float %tmp13, ptr %tmp.i.i12, align 4 + %tmp.i.i13 = getelementptr i8, ptr %arg1, i64 28 + %tmp1.i.i14 = load float, ptr %tmp.i.i13, align 4 + %tmp.i.i15 = getelementptr i8, ptr %arg1, i64 36 + %tmp1.i.i16 = load float, ptr %tmp.i.i15, align 4 + %tmp1.i.i.i18 = fcmp olt float %tmp1.i.i16, 0.000000e+00 + %tmp17 = select i1 %tmp1.i.i.i18, float %tmp1.i.i16, float %tmp1.i.i14 + %tmp.i.i20 = getelementptr i8, ptr %arg, i64 32 + store float %tmp17, ptr %tmp.i.i20, align 8 + store float %tmp1.i.i14, ptr %arg2, align 4 + %tmp1.i.i.i24 = fcmp olt float %tmp1.i.i14, 0.000000e+00 + %tmp20 = select i1 %tmp1.i.i.i24, float %tmp1.i.i16, float %tmp1.i.i14 + %tmp.i.i26 = getelementptr i8, ptr %arg, i64 36 + store float %tmp20, ptr %tmp.i.i26, align 4 + ret void +} diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index e48dd5bb392082..1691f7733acea5 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -22,21 +22,21 @@ define i32 @test1(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2, !dbg [[DBG24:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3, !dbg [[DBG26:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0, !dbg [[DBG28:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]], !dbg [[DBG29:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG30:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG31:![0-9]+]] ; entry: @@ -71,23 +71,23 @@ define i32 @test2(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test2( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG46:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG46:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2, !dbg [[DBG48:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3, !dbg [[DBG50:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <2 x i32> , !dbg [[DBG52:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] ; DEBUG-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], i32 0, !dbg [[DBG53:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP3]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG53]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP3]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG53]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]], !dbg [[DBG54:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]], !dbg [[DBG55:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG56:![0-9]+]] ; entry: @@ -123,22 +123,22 @@ define i32 @test3(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test3( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 -1, i32 2, !dbg [[DBG72:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG73:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 3, !dbg [[DBG75:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0, !dbg [[DBG77:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG78:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG79:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG80:![0-9]+]] ; entry: @@ -179,26 +179,26 @@ define i32 @test4(<4 x i32> %x, <4 x i32> %y, ptr %z) { ; ; DEBUG-LABEL: @test4( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, ptr [[Z:%.*]], align 1, !dbg [[DBG96:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Z]], i64 0, i64 2, !dbg [[DBG98:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98]] ; DEBUG-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, ptr [[Z_TMP1]], align 1, !dbg [[DBG99:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2, !dbg [[DBG99]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG100:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3, !dbg [[DBG102:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0, !dbg [[DBG104:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG105:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG106:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG107:![0-9]+]] ; entry: @@ -243,26 +243,26 @@ define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, ptr addrspace(1) %z) { ; ; DEBUG-LABEL: @test4_as1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, ptr addrspace(1) [[Z:%.*]], align 1, !dbg [[DBG123:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[Z]], i16 0, i16 2, !dbg [[DBG125:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr addrspace(1) [[Z_TMP1]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(1) [[Z_TMP1]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125]] ; DEBUG-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, ptr addrspace(1) [[Z_TMP1]], align 1, !dbg [[DBG126:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2, !dbg [[DBG126]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG127:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3, !dbg [[DBG129:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0, !dbg [[DBG131:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG131]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG131]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG132:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG133:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG134:![0-9]+]] ; entry: @@ -305,25 +305,25 @@ define i32 @test5(<4 x i32> %x, <4 x i32> %y, ptr %z) { ; ; DEBUG-LABEL: @test5( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Z:%.*]], i64 0, i64 2, !dbg [[DBG151:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT3:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 2, !dbg [[DBG152:![0-9]+]] ; DEBUG-NEXT: store i32 [[A_SROA_0_8_VEC_EXTRACT3]], ptr [[Z_TMP1]], align 1, !dbg [[DBG152]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 2, !dbg [[DBG153:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_4_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 3, !dbg [[DBG155:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_4_12_VEC_EXTRACT]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_4_12_VEC_EXTRACT]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_4_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0, !dbg [[DBG157:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_4_0_VEC_EXTRACT]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_4_0_VEC_EXTRACT]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_4_12_VEC_EXTRACT]], !dbg [[DBG158:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_4_0_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG159:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG160:![0-9]+]] ; entry: @@ -367,17 +367,17 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { ; ; DEBUG-LABEL: @test6( ; DEBUG-NEXT: [[TMP:%.*]] = alloca { <4 x i64>, <4 x i64> }, align 32, !dbg [[DBG168:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG168]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG168]] ; DEBUG-NEXT: [[P0:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 0, !dbg [[DBG169:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[P0]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[P0]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]] ; DEBUG-NEXT: store <4 x i64> [[X:%.*]], ptr [[P0]], align 32, !dbg [[DBG170:![0-9]+]] ; DEBUG-NEXT: [[P1:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 1, !dbg [[DBG171:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[P1]], metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[P1]], metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171]] ; DEBUG-NEXT: store <4 x i64> [[Y:%.*]], ptr [[P1]], align 32, !dbg [[DBG172:![0-9]+]] ; DEBUG-NEXT: [[ADDR:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 0, i64 [[N:%.*]], !dbg [[DBG173:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[ADDR]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[ADDR]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173]] ; DEBUG-NEXT: [[RES:%.*]] = load i64, ptr [[ADDR]], align 4, !dbg [[DBG174:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i64 [[RES]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i64 [[RES]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174]] ; DEBUG-NEXT: ret i64 [[RES]], !dbg [[DBG175:![0-9]+]] ; %tmp = alloca { <4 x i64>, <4 x i64> } @@ -401,15 +401,15 @@ define <4 x i32> @test_subvec_store() { ; ; DEBUG-LABEL: @test_subvec_store( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184:![0-9]+]] ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> undef, !dbg [[DBG185:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_0_VECBLEND]], !dbg [[DBG187:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188:![0-9]+]] ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_4_VECBLEND]], !dbg [[DBG189:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190:![0-9]+]] ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[A_8_VECBLEND]], i32 3, i32 3, !dbg [[DBG191:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[A_12_VEC_INSERT]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[A_12_VEC_INSERT]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]] ; DEBUG-NEXT: ret <4 x i32> [[A_12_VEC_INSERT]], !dbg [[DBG193:![0-9]+]] ; entry: @@ -443,19 +443,19 @@ define <4 x i32> @test_subvec_load() { ; ; DEBUG-LABEL: @test_subvec_load( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] ; DEBUG-NEXT: [[A_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG205:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_0_VEC_EXTRACT]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_0_VEC_EXTRACT]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206:![0-9]+]] ; DEBUG-NEXT: [[A_4_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG207:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_4_VEC_EXTRACT]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_4_VEC_EXTRACT]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208:![0-9]+]] ; DEBUG-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG209:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_8_VEC_EXTRACT]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_8_VEC_EXTRACT]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209]] ; DEBUG-NEXT: [[TMP:%.*]] = shufflevector <2 x i32> [[A_0_VEC_EXTRACT]], <2 x i32> [[A_4_VEC_EXTRACT]], <2 x i32> , !dbg [[DBG210:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[TMP]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[TMP]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210]] ; DEBUG-NEXT: [[RET:%.*]] = shufflevector <2 x i32> [[TMP]], <2 x i32> [[A_8_VEC_EXTRACT]], <4 x i32> , !dbg [[DBG211:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[RET]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[RET]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] ; DEBUG-NEXT: ret <4 x i32> [[RET]], !dbg [[DBG212:![0-9]+]] ; entry: @@ -488,15 +488,15 @@ define <4 x float> @test_subvec_memset() { ; ; DEBUG-LABEL: @test_subvec_memset( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG220:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG220:![0-9]+]] ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> undef, !dbg [[DBG221:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META216:![0-9]+]], metadata !DIExpression()), !dbg [[DBG222:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META216:![0-9]+]], metadata !DIExpression()), !dbg [[DBG222:![0-9]+]] ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_0_VECBLEND]], !dbg [[DBG223:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_4_VECBLEND]], !dbg [[DBG225:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META218:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META218:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float 0x38E0E0E0E0000000, i32 3, !dbg [[DBG227:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] ; DEBUG-NEXT: ret <4 x float> [[A_12_VEC_INSERT]], !dbg [[DBG229:![0-9]+]] ; entry: @@ -538,24 +538,24 @@ define <4 x float> @test_subvec_memcpy(ptr %x, ptr %y, ptr %z, ptr %f, ptr %out) ; ; DEBUG-LABEL: @test_subvec_memcpy( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] ; DEBUG-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, ptr [[X:%.*]], align 1, !dbg [[DBG238:![0-9]+]] ; DEBUG-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG238]] ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef, !dbg [[DBG238]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] ; DEBUG-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, ptr [[Y:%.*]], align 1, !dbg [[DBG240:![0-9]+]] ; DEBUG-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG240]] ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]], !dbg [[DBG240]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] ; DEBUG-NEXT: [[A_8_COPYLOAD:%.*]] = load <2 x float>, ptr [[Z:%.*]], align 1, !dbg [[DBG242:![0-9]+]] ; DEBUG-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG242]] ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_8_VEC_EXPAND]], <4 x float> [[A_4_VECBLEND]], !dbg [[DBG242]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] ; DEBUG-NEXT: [[A_12_COPYLOAD:%.*]] = load float, ptr [[F:%.*]], align 1, !dbg [[DBG244:![0-9]+]] ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float [[A_12_COPYLOAD]], i32 3, !dbg [[DBG244]] ; DEBUG-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[A_12_VEC_INSERT]], <4 x float> poison, <2 x i32> , !dbg [[DBG245:![0-9]+]] ; DEBUG-NEXT: store <2 x float> [[A_8_VEC_EXTRACT]], ptr [[OUT:%.*]], align 1, !dbg [[DBG245]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] ; DEBUG-NEXT: ret <4 x float> [[A_12_VEC_INSERT]], !dbg [[DBG247:![0-9]+]] ; entry: @@ -596,7 +596,7 @@ define i32 @PR14212(<3 x i8> %val) { ; ; DEBUG-LABEL: @PR14212( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> [[VAL:%.*]] to i24, !dbg [[DBG253:![0-9]+]] ; DEBUG-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32, !dbg [[DBG254:![0-9]+]] ; DEBUG-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[RETVAL_SROA_2_0_INSERT_EXT]], 24, !dbg [[DBG254]] @@ -605,7 +605,7 @@ define i32 @PR14212(<3 x i8> %val) { ; DEBUG-NEXT: [[RETVAL_0_INSERT_EXT:%.*]] = zext i24 [[TMP0]] to i32, !dbg [[DBG254]] ; DEBUG-NEXT: [[RETVAL_0_INSERT_MASK:%.*]] = and i32 [[RETVAL_SROA_2_0_INSERT_INSERT]], -16777216, !dbg [[DBG254]] ; DEBUG-NEXT: [[RETVAL_0_INSERT_INSERT:%.*]] = or i32 [[RETVAL_0_INSERT_MASK]], [[RETVAL_0_INSERT_EXT]], !dbg [[DBG254]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[RETVAL_0_INSERT_INSERT]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[RETVAL_0_INSERT_INSERT]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253]] ; DEBUG-NEXT: ret i32 [[RETVAL_0_INSERT_INSERT]], !dbg [[DBG254]] ; entry: @@ -630,12 +630,12 @@ define <2 x i8> @PR14349.1(i32 %x) { ; ; DEBUG-LABEL: @PR14349.1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG261:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast i16 [[A_SROA_0_0_EXTRACT_TRUNC]] to <2 x i8>, !dbg [[DBG261]] ; DEBUG-NEXT: [[A_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[X]], 16, !dbg [[DBG261]] ; DEBUG-NEXT: [[A_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_2_0_EXTRACT_SHIFT]] to i16, !dbg [[DBG261]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i8> [[TMP0]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i8> [[TMP0]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] ; DEBUG-NEXT: ret <2 x i8> [[TMP0]], !dbg [[DBG263:![0-9]+]] ; entry: @@ -666,7 +666,7 @@ define i32 @PR14349.2(<2 x i8> %x) { ; ; DEBUG-LABEL: @PR14349.2( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[X:%.*]] to i16, !dbg [[DBG269:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i16 undef to i32, !dbg [[DBG270:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 16, !dbg [[DBG270]] @@ -675,7 +675,7 @@ define i32 @PR14349.2(<2 x i8> %x) { ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG270]] ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_2_0_INSERT_INSERT]], -65536, !dbg [[DBG270]] ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]], !dbg [[DBG270]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_INSERT_INSERT]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_INSERT_INSERT]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269]] ; DEBUG-NEXT: ret i32 [[A_SROA_0_0_INSERT_INSERT]], !dbg [[DBG270]] ; entry: @@ -702,21 +702,21 @@ define i32 @test7(<2 x i32> %x, <2 x i32> %y) { ; ; DEBUG-LABEL: @test7( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 1, !dbg [[DBG286:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_12_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y:%.*]], i32 1, !dbg [[DBG288:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_2_12_VEC_EXTRACT]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG288]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_12_VEC_EXTRACT]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG288]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_8_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y]], i32 0, !dbg [[DBG290:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_2_8_VEC_EXTRACT]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_8_VEC_EXTRACT]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_4_VEC_EXTRACT]], [[A_SROA_2_12_VEC_EXTRACT]], !dbg [[DBG291:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291]] ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_8_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG292:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292]] ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG293:![0-9]+]] ; entry: @@ -751,14 +751,14 @@ define i32 @test8(<2 x i32> %x) { ; ; DEBUG-LABEL: @test8( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0, !dbg [[DBG302:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_VEC_EXTRACT]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG302]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_VEC_EXTRACT]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG302]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X]], i32 1, !dbg [[DBG304:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_0_VEC_EXTRACT]], [[A_SROA_0_4_VEC_EXTRACT]], !dbg [[DBG305:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305]] ; DEBUG-NEXT: ret i32 [[TMP4]], !dbg [[DBG306:![0-9]+]] ; entry: @@ -786,11 +786,11 @@ define <2 x i32> @test9(i32 %x, i32 %y) { ; ; DEBUG-LABEL: @test9( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0, !dbg [[DBG313:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG314:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG314:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[A_SROA_0_0_VEC_INSERT]], i32 [[Y:%.*]], i32 1, !dbg [[DBG315:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]] ; DEBUG-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]], !dbg [[DBG317:![0-9]+]] ; entry: @@ -817,11 +817,11 @@ define <2 x i32> @test10(<4 x i16> %x, i32 %y) { ; ; DEBUG-LABEL: @test10( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG323:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG323:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32>, !dbg [[DBG324:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1, !dbg [[DBG326:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META322:![0-9]+]], metadata !DIExpression()), !dbg [[DBG327:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META322:![0-9]+]], metadata !DIExpression()), !dbg [[DBG327:![0-9]+]] ; DEBUG-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]], !dbg [[DBG328:![0-9]+]] ; entry: @@ -850,12 +850,12 @@ define <2 x float> @test11(<4 x i16> %x, i32 %y) { ; ; DEBUG-LABEL: @test11( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META331:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META331:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32>, !dbg [[DBG335:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG336:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG336:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1, !dbg [[DBG337:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[A_SROA_0_4_VEC_INSERT]] to <2 x float>, !dbg [[DBG338:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x float> [[TMP1]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x float> [[TMP1]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338]] ; DEBUG-NEXT: ret <2 x float> [[TMP1]], !dbg [[DBG339:![0-9]+]] ; entry: @@ -876,9 +876,9 @@ define <4 x float> @test12(<4 x i32> %val) { ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; ; DEBUG-LABEL: @test12( -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG344:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG344:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VAL:%.*]] to <4 x float>, !dbg [[DBG345:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x float> [[TMP1]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[TMP1]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345]] ; DEBUG-NEXT: ret <4 x float> [[TMP1]], !dbg [[DBG346:![0-9]+]] ; %a = alloca <3 x i32>, align 16 @@ -904,16 +904,16 @@ define <2 x i64> @test13(i32 %a, i32 %b, i32 %c, i32 %d) { ; ; DEBUG-LABEL: @test13( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0, !dbg [[DBG355:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1, !dbg [[DBG357:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META351:![0-9]+]], metadata !DIExpression()), !dbg [[DBG358:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META351:![0-9]+]], metadata !DIExpression()), !dbg [[DBG358:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2, !dbg [[DBG359:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META352:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META352:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3, !dbg [[DBG361:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[X_SROA_0_12_VEC_INSERT]] to <2 x i64>, !dbg [[DBG362:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i64> [[TMP0]], metadata [[META353:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i64> [[TMP0]], metadata [[META353:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362]] ; DEBUG-NEXT: ret <2 x i64> [[TMP0]], !dbg [[DBG363:![0-9]+]] ; entry: @@ -946,26 +946,26 @@ define i32 @test14(<2 x i64> %x) { ; ; DEBUG-LABEL: @test14( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>, !dbg [[DBG379:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META367:![0-9]+]], metadata !DIExpression()), !dbg [[DBG380:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META367:![0-9]+]], metadata !DIExpression()), !dbg [[DBG380:![0-9]+]] ; DEBUG-NEXT: [[X_ADDR_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0, !dbg [[DBG381:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] ; DEBUG-NEXT: [[X_ADDR_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1, !dbg [[DBG383:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_4_VEC_EXTRACT]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG384:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_4_VEC_EXTRACT]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG384:![0-9]+]] ; DEBUG-NEXT: [[X_ADDR_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2, !dbg [[DBG385:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META373:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META373:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] ; DEBUG-NEXT: [[X_ADDR_SROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3, !dbg [[DBG387:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_12_VEC_EXTRACT]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG387]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_12_VEC_EXTRACT]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG387]] ; DEBUG-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], [[X_ADDR_SROA_0_4_VEC_EXTRACT]], !dbg [[DBG388:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD]], metadata [[META375:![0-9]+]], metadata !DIExpression()), !dbg [[DBG388]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD]], metadata [[META375:![0-9]+]], metadata !DIExpression()), !dbg [[DBG388]] ; DEBUG-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], [[X_ADDR_SROA_0_12_VEC_EXTRACT]], !dbg [[DBG389:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD1]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG389]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD1]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG389]] ; DEBUG-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]], !dbg [[DBG390:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD2]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG390]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD2]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG390]] ; DEBUG-NEXT: ret i32 [[ADD2]], !dbg [[DBG391:![0-9]+]] ; entry: @@ -1002,19 +1002,19 @@ define <4 x ptr> @test15(i32 %a, i32 %b, i32 %c, i32 %d) { ; DEBUG-LABEL: @test15( ; DEBUG-NEXT: entry: ; DEBUG-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32, !dbg [[DBG400:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] ; DEBUG-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32, !dbg [[DBG401:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META395:![0-9]+]], metadata !DIExpression()), !dbg [[DBG402:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META395:![0-9]+]], metadata !DIExpression()), !dbg [[DBG402:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG403:![0-9]+]] ; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4, !dbg [[DBG403]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META396:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META396:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_8_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8, !dbg [[DBG405:![0-9]+]] ; DEBUG-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_X_TMP3_SROA_IDX2]], align 8, !dbg [[DBG405]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META397:![0-9]+]], metadata !DIExpression()), !dbg [[DBG406:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META397:![0-9]+]], metadata !DIExpression()), !dbg [[DBG406:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_12_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12, !dbg [[DBG407:![0-9]+]] ; DEBUG-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_X_TMP4_SROA_IDX3]], align 4, !dbg [[DBG407]] ; DEBUG-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32, !dbg [[DBG408:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META398:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META398:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]] ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], !dbg [[DBG409:![0-9]+]] ; entry: @@ -1045,19 +1045,19 @@ define <4 x ptr> @test16(i64 %a, i64 %b, i64 %c, i64 %d) { ; ; DEBUG-LABEL: @test16( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG417:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG417:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A:%.*]] to ptr, !dbg [[DBG418:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x ptr> undef, ptr [[TMP0]], i32 0, !dbg [[DBG418]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG419:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG419:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = inttoptr i64 [[B:%.*]] to ptr, !dbg [[DBG420:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_0_VEC_INSERT]], ptr [[TMP1]], i32 1, !dbg [[DBG420]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421:![0-9]+]] ; DEBUG-NEXT: [[TMP2:%.*]] = inttoptr i64 [[C:%.*]] to ptr, !dbg [[DBG422:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_16_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_8_VEC_INSERT]], ptr [[TMP2]], i32 2, !dbg [[DBG422]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG423:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG423:![0-9]+]] ; DEBUG-NEXT: [[TMP3:%.*]] = inttoptr i64 [[D:%.*]] to ptr, !dbg [[DBG424:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_24_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_16_VEC_INSERT]], ptr [[TMP3]], i32 3, !dbg [[DBG424]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_24_VEC_INSERT]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_24_VEC_INSERT]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425:![0-9]+]] ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_24_VEC_INSERT]], !dbg [[DBG426:![0-9]+]] ; entry: @@ -1090,19 +1090,19 @@ define <4 x ptr> @test17(i32 %a, i32 %b, i64 %c, i64 %d) { ; DEBUG-LABEL: @test17( ; DEBUG-NEXT: entry: ; DEBUG-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32, !dbg [[DBG434:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG434]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG434]] ; DEBUG-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32, !dbg [[DBG435:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG437:![0-9]+]] ; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4, !dbg [[DBG437]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG438:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG438:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_16_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16, !dbg [[DBG439:![0-9]+]] ; DEBUG-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_X_TMP3_SROA_IDX2]], align 16, !dbg [[DBG439]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META432:![0-9]+]], metadata !DIExpression()), !dbg [[DBG440:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META432:![0-9]+]], metadata !DIExpression()), !dbg [[DBG440:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_24_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24, !dbg [[DBG441:![0-9]+]] ; DEBUG-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_X_TMP4_SROA_IDX3]], align 8, !dbg [[DBG441]] ; DEBUG-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32, !dbg [[DBG442:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META433:![0-9]+]], metadata !DIExpression()), !dbg [[DBG442]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META433:![0-9]+]], metadata !DIExpression()), !dbg [[DBG442]] ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], !dbg [[DBG443:![0-9]+]] ; entry: @@ -1129,10 +1129,10 @@ define i1 @test18() { ; ; DEBUG-LABEL: @test18( ; DEBUG-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i64>, align 32, !dbg [[DBG449:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META446:![0-9]+]], metadata !DIExpression()), !dbg [[DBG449]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META446:![0-9]+]], metadata !DIExpression()), !dbg [[DBG449]] ; DEBUG-NEXT: store <2 x i64> , ptr [[A_SROA_0]], align 32, !dbg [[DBG450:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i1, ptr [[A_SROA_0]], align 32, !dbg [[DBG451:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[A_SROA_0_0_A_SROA_0_0_L]], metadata [[META447:![0-9]+]], metadata !DIExpression()), !dbg [[DBG451]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i1 [[A_SROA_0_0_A_SROA_0_0_L]], metadata [[META447:![0-9]+]], metadata !DIExpression()), !dbg [[DBG451]] ; DEBUG-NEXT: ret i1 [[A_SROA_0_0_A_SROA_0_0_L]], !dbg [[DBG452:![0-9]+]] ; %a = alloca <8 x i32> @@ -1149,7 +1149,7 @@ define void @swap-8bytes(ptr %x, ptr %y) { ; CHECK-NEXT: ret void ; ; DEBUG-LABEL: @swap-8bytes( -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META455:![0-9]+]], metadata !DIExpression()), !dbg [[DBG456:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META455:![0-9]+]], metadata !DIExpression()), !dbg [[DBG456:![0-9]+]] ; DEBUG-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], align 1, !dbg [[DBG457:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 8, i1 false), !dbg [[DBG458:![0-9]+]] ; DEBUG-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1, !dbg [[DBG459:![0-9]+]] @@ -1172,7 +1172,7 @@ define void @swap-7bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-7bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1, !dbg [[DBG464:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META463:![0-9]+]], metadata !DIExpression()), !dbg [[DBG464]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META463:![0-9]+]], metadata !DIExpression()), !dbg [[DBG464]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 7, i1 false), !dbg [[DBG465:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 7, i1 false), !dbg [[DBG466:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 7, i1 false), !dbg [[DBG467:![0-9]+]] @@ -1195,7 +1195,7 @@ define void @swap-16bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-16bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8, !dbg [[DBG472:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META471:![0-9]+]], metadata !DIExpression()), !dbg [[DBG472]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META471:![0-9]+]], metadata !DIExpression()), !dbg [[DBG472]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 16, i1 false), !dbg [[DBG473:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 16, i1 false), !dbg [[DBG474:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 16, i1 false), !dbg [[DBG475:![0-9]+]] @@ -1218,7 +1218,7 @@ define void @swap-15bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-15bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1, !dbg [[DBG480:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META479:![0-9]+]], metadata !DIExpression()), !dbg [[DBG480]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META479:![0-9]+]], metadata !DIExpression()), !dbg [[DBG480]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 15, i1 false), !dbg [[DBG481:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 15, i1 false), !dbg [[DBG482:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 15, i1 false), !dbg [[DBG483:![0-9]+]] @@ -1245,17 +1245,17 @@ define <4 x i32> @ptrLoadStoreTys(ptr %init, i32 %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTys( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG492:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64, !dbg [[DBG494:![0-9]+]] ; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG494]] ; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> , !dbg [[DBG494]] ; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG494]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495:![0-9]+]] ; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG496:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497:![0-9]+]] ; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG498:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG499:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG499:![0-9]+]] ; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG500:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 @@ -1285,19 +1285,19 @@ define <4 x float> @ptrLoadStoreTysFloat(ptr %init, float %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTysFloat( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG508:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG508]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG508]] ; DEBUG-NEXT: [[OBJ:%.*]] = alloca <4 x float>, align 16, !dbg [[DBG509:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]] ; DEBUG-NEXT: store <4 x float> zeroinitializer, ptr [[OBJ]], align 16, !dbg [[DBG510:![0-9]+]] ; DEBUG-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16, !dbg [[DBG511:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG512:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG512:![0-9]+]] ; DEBUG-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG513:![0-9]+]] ; DEBUG-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8, !dbg [[DBG513]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META506:![0-9]+]], metadata !DIExpression()), !dbg [[DBG514:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META506:![0-9]+]], metadata !DIExpression()), !dbg [[DBG514:![0-9]+]] ; DEBUG-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG515:![0-9]+]] ; DEBUG-NEXT: store float 1.310720e+05, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4, !dbg [[DBG515]] ; DEBUG-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x float>, ptr [[OBJ]], align 16, !dbg [[DBG516:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x float> [[OBJ_0_SROAVAL]], metadata [[META507:![0-9]+]], metadata !DIExpression()), !dbg [[DBG516]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[OBJ_0_SROAVAL]], metadata [[META507:![0-9]+]], metadata !DIExpression()), !dbg [[DBG516]] ; DEBUG-NEXT: ret <4 x float> [[OBJ_0_SROAVAL]], !dbg [[DBG517:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 @@ -1325,17 +1325,17 @@ define <4 x i32> @ptrLoadStoreTysAS3(ptr %init, i32 %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTysAS3( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr addrspace(3), ptr [[INIT:%.*]], align 8, !dbg [[DBG525:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr addrspace(3) [[VAL0]], metadata [[META520:![0-9]+]], metadata !DIExpression()), !dbg [[DBG525]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META521:![0-9]+]], metadata !DIExpression()), !dbg [[DBG526:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(3) [[VAL0]], metadata [[META520:![0-9]+]], metadata !DIExpression()), !dbg [[DBG525]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META521:![0-9]+]], metadata !DIExpression()), !dbg [[DBG526:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[VAL0]] to i64, !dbg [[DBG527:![0-9]+]] ; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG527]] ; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> , !dbg [[DBG527]] ; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG527]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META522:![0-9]+]], metadata !DIExpression()), !dbg [[DBG528:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META522:![0-9]+]], metadata !DIExpression()), !dbg [[DBG528:![0-9]+]] ; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG529:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META523:![0-9]+]], metadata !DIExpression()), !dbg [[DBG530:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META523:![0-9]+]], metadata !DIExpression()), !dbg [[DBG530:![0-9]+]] ; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG531:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META524:![0-9]+]], metadata !DIExpression()), !dbg [[DBG532:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META524:![0-9]+]], metadata !DIExpression()), !dbg [[DBG532:![0-9]+]] ; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG533:![0-9]+]] ; %val0 = load ptr addrspace(3), ptr %init, align 8 @@ -1365,19 +1365,19 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTysPtr( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG541:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META536:![0-9]+]], metadata !DIExpression()), !dbg [[DBG541]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META536:![0-9]+]], metadata !DIExpression()), !dbg [[DBG541]] ; DEBUG-NEXT: [[OBJ:%.*]] = alloca <4 x ptr>, align 16, !dbg [[DBG542:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META537:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META537:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] ; DEBUG-NEXT: store <4 x ptr> zeroinitializer, ptr [[OBJ]], align 16, !dbg [[DBG543:![0-9]+]] ; DEBUG-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16, !dbg [[DBG544:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META538:![0-9]+]], metadata !DIExpression()), !dbg [[DBG545:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META538:![0-9]+]], metadata !DIExpression()), !dbg [[DBG545:![0-9]+]] ; DEBUG-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG546:![0-9]+]] ; DEBUG-NEXT: store i64 [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8, !dbg [[DBG546]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META539:![0-9]+]], metadata !DIExpression()), !dbg [[DBG547:![0-9]+]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META539:![0-9]+]], metadata !DIExpression()), !dbg [[DBG547:![0-9]+]] ; DEBUG-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG548:![0-9]+]] ; DEBUG-NEXT: store i64 131072, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4, !dbg [[DBG548]] ; DEBUG-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x ptr>, ptr [[OBJ]], align 16, !dbg [[DBG549:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x ptr> [[OBJ_0_SROAVAL]], metadata [[META540:![0-9]+]], metadata !DIExpression()), !dbg [[DBG549]] +; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[OBJ_0_SROAVAL]], metadata [[META540:![0-9]+]], metadata !DIExpression()), !dbg [[DBG549]] ; DEBUG-NEXT: ret <4 x ptr> [[OBJ_0_SROAVAL]], !dbg [[DBG550:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx940.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx940.s deleted file mode 100644 index c66d0b44b5e9a6..00000000000000 --- a/llvm/test/tools/llvm-mca/AMDGPU/gfx940.s +++ /dev/null @@ -1,189 +0,0 @@ -# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s - -# CHECK: Iterations: 1 -# CHECK: Instructions: 71 -# CHECK: Total Cycles: 562 -# CHECK: Total uOps: 77 - -# CHECK: Resources: -# CHECK: [0] - HWBranch -# CHECK: [1] - HWExport -# CHECK: [2] - HWLGKM -# CHECK: [3] - HWSALU -# CHECK: [4] - HWVALU -# CHECK: [5] - HWVMEM -# CHECK: [6] - HWXDL - -v_pk_fma_f32 v[0:1], v[0:1], v[0:1], v[0:1] -v_pk_mov_b32 v[0:1], v[2:3], v[4:5] -v_pk_add_f32 v[0:1], v[0:1], v[0:1] -v_pk_mul_f32 v[0:1], v[0:1], v[0:1] -v_add_co_u32 v5, s[0:1], v1, v2 -v_sub_co_u32 v5, s[0:1], v1, v2 -v_subrev_co_u32 v5, s[0:1], v1, v2 -v_addc_co_u32 v5, s[0:1], v1, v2, s[2:3] -v_subb_co_u32 v5, s[0:1], v1, v2, s[2:3] -v_subbrev_co_u32 v5, s[0:1], v1, v2, s[2:3] -v_add_u32 v5, v1, v2 -v_sub_u32 v5, v1, v2 -v_subrev_u32 v5, v1, v2 - -v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] -v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] - -v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] -v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] - -v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] -v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] - -v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] -v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] - -v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] -v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] - -v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] -v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] - -v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] -v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] - -v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] -v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] - -v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] -v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] - -v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] -v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] - -v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] -v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] - -v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] -v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] - -v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] -v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] - -v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] -v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] - -v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] -v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] - -v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] -v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] - -v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] -v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] - -v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] -v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] - -v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] -v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] - -v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 -v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 - -v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] -v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] - -v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] -v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] - -v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] -v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] - -v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] -v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] - -v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 -v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 - -v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 -v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 - -v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 -v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 - -v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 -v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 - -v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 -v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 - -# CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: - - - - 1.00 - - v_pk_fma_f32 v[0:1], v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - - 1.00 - - v_pk_mov_b32 v[0:1], v[2:3], v[4:5] -# CHECK-NEXT: - - - - 1.00 - - v_pk_add_f32 v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - - 1.00 - - v_pk_mul_f32 v[0:1], v[0:1], v[0:1] -# CHECK-NEXT: - - - 1.00 1.00 - - v_add_co_u32_e64 v5, s[0:1], v1, v2 -# CHECK-NEXT: - - - 1.00 1.00 - - v_sub_co_u32_e64 v5, s[0:1], v1, v2 -# CHECK-NEXT: - - - 1.00 1.00 - - v_subrev_co_u32_e64 v5, s[0:1], v1, v2 -# CHECK-NEXT: - - - 1.00 1.00 - - v_addc_co_u32_e64 v5, s[0:1], v1, v2, s[2:3] -# CHECK-NEXT: - - - 1.00 1.00 - - v_subb_co_u32_e64 v5, s[0:1], v1, v2, s[2:3] -# CHECK-NEXT: - - - 1.00 1.00 - - v_subbrev_co_u32_e64 v5, s[0:1], v1, v2, s[2:3] -# CHECK-NEXT: - - - - 1.00 - - v_add_u32_e32 v5, v1, v2 -# CHECK-NEXT: - - - - 1.00 - - v_sub_u32_e32 v5, v1, v2 -# CHECK-NEXT: - - - - 1.00 - - v_subrev_u32_e32 v5, v1, v2 -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] -# CHECK-NEXT: - - - - 1.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] -# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] -# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] -# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] -# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 -# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 -# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 -# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 -# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 -# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index bb8e76a2eeb8be..e0772684e3a954 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/SourceMgr.h" @@ -728,4 +730,70 @@ TEST_F(AArch64SelectionDAGTest, ReplaceAllUsesWith) { EXPECT_EQ(DAG->getPCSections(New.getNode()), MD); } +TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_known01) { + SDLoc Loc; + auto Int8VT = EVT::getIntegerVT(Context, 8); + auto Int32VT = EVT::getIntegerVT(Context, 32); + auto Int64VT = EVT::getIntegerVT(Context, 64); + auto Ptr = DAG->getConstant(0, Loc, Int64VT); + auto PtrInfo = + MachinePointerInfo::getFixedStack(DAG->getMachineFunction(), 0); + AAMDNodes AA; + MDBuilder MDHelper(*DAG->getContext()); + MDNode *Range = MDHelper.createRange(APInt(8, 0), APInt(8, 2)); + MachineMemOperand *MMO = DAG->getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, 8, Align(8), AA, Range); + + auto ALoad = DAG->getExtLoad(ISD::EXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + KnownBits Known = DAG->computeKnownBits(ALoad); + EXPECT_EQ(Known.Zero, APInt(32, 0xfe)); + EXPECT_EQ(Known.One, APInt(32, 0)); + + auto ZLoad = DAG->getExtLoad(ISD::ZEXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + Known = DAG->computeKnownBits(ZLoad); + EXPECT_EQ(Known.Zero, APInt(32, 0xfffffffe)); + EXPECT_EQ(Known.One, APInt(32, 0)); + + auto SLoad = DAG->getExtLoad(ISD::SEXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + Known = DAG->computeKnownBits(SLoad); + EXPECT_EQ(Known.Zero, APInt(32, 0xfffffffe)); + EXPECT_EQ(Known.One, APInt(32, 0)); +} + +TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_knownnegative) { + SDLoc Loc; + auto Int8VT = EVT::getIntegerVT(Context, 8); + auto Int32VT = EVT::getIntegerVT(Context, 32); + auto Int64VT = EVT::getIntegerVT(Context, 64); + auto Ptr = DAG->getConstant(0, Loc, Int64VT); + auto PtrInfo = + MachinePointerInfo::getFixedStack(DAG->getMachineFunction(), 0); + AAMDNodes AA; + MDBuilder MDHelper(*DAG->getContext()); + MDNode *Range = MDHelper.createRange(APInt(8, 0xf0), APInt(8, 0xff)); + MachineMemOperand *MMO = DAG->getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, 8, Align(8), AA, Range); + + auto ALoad = DAG->getExtLoad(ISD::EXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + KnownBits Known = DAG->computeKnownBits(ALoad); + EXPECT_EQ(Known.Zero, APInt(32, 0)); + EXPECT_EQ(Known.One, APInt(32, 0xf0)); + + auto ZLoad = DAG->getExtLoad(ISD::ZEXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + Known = DAG->computeKnownBits(ZLoad); + EXPECT_EQ(Known.Zero, APInt(32, 0xffffff00)); + EXPECT_EQ(Known.One, APInt(32, 0x000000f0)); + + auto SLoad = DAG->getExtLoad(ISD::SEXTLOAD, Loc, Int32VT, DAG->getEntryNode(), + Ptr, Int8VT, MMO); + Known = DAG->computeKnownBits(SLoad); + EXPECT_EQ(Known.Zero, APInt(32, 0)); + EXPECT_EQ(Known.One, APInt(32, 0xfffffff0)); +} + } // end namespace llvm diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 31e3b9dfab4bfd..b2cd71e6a38568 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -339,5 +339,33 @@ TEST(VerifierTest, SwitchInst) { EXPECT_TRUE(verifyFunction(*F)); } +TEST(VerifierTest, CrossFunctionRef) { + LLVMContext C; + Module M("M", C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false); + Function *F1 = Function::Create(FTy, Function::ExternalLinkage, "foo1", M); + Function *F2 = Function::Create(FTy, Function::ExternalLinkage, "foo2", M); + BasicBlock *Entry1 = BasicBlock::Create(C, "entry", F1); + BasicBlock *Entry2 = BasicBlock::Create(C, "entry", F2); + Type *I32 = Type::getInt32Ty(C); + + Value *Alloca = new AllocaInst(I32, 0, "alloca", Entry1); + ReturnInst::Create(C, Entry1); + + Instruction *Store = new StoreInst(ConstantInt::get(I32, 0), Alloca, Entry2); + ReturnInst::Create(C, Entry2); + + std::string Error; + raw_string_ostream ErrorOS(Error); + EXPECT_TRUE(verifyModule(M, &ErrorOS)); + EXPECT_TRUE( + StringRef(ErrorOS.str()) + .starts_with("Referring to an instruction in another function!")); + + // Explicitly erase the store to avoid a use-after-free when the module is + // destroyed. + Store->eraseFromParent(); +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index e89fc687451cd7..297100441113a6 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -439,7 +439,7 @@ INSTANTIATE_TEST_SUITE_P( ARM::AEK_HWDIVARM | ARM::AEK_MP | ARM::AEK_SEC | ARM::AEK_VIRT | ARM::AEK_DSP | ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_RAS | ARM::AEK_I8MM | - ARM::AEK_SB, + ARM::AEK_FP16FML | ARM::AEK_SB, "9-A"), ARMCPUTestParams("neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | @@ -1575,8 +1575,9 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_SB, AArch64::AEK_SVE2, AArch64::AEK_SVE2BITPERM, AArch64::AEK_BF16, AArch64::AEK_I8MM, AArch64::AEK_JSCVT, - AArch64::AEK_FCMA, AArch64::AEK_PAUTH})), - "8.5-A"), + AArch64::AEK_FCMA, AArch64::AEK_PAUTH, + AArch64::AEK_FP16FML})), + "9-A"), ARMCPUTestParams( "ampere1", "armv8.6-a", "crypto-neon-fp-armv8", (AArch64::ExtensionBitset( diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index f26815c2f184fa..980c9bdb6367f7 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -91,6 +91,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::isVoid: return "MVT::isVoid"; case MVT::v1i1: return "MVT::v1i1"; case MVT::v2i1: return "MVT::v2i1"; + case MVT::v3i1: return "MVT::v3i1"; case MVT::v4i1: return "MVT::v4i1"; case MVT::v8i1: return "MVT::v8i1"; case MVT::v16i1: return "MVT::v16i1"; @@ -107,6 +108,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v128i4: return "MVT::v128i4"; case MVT::v1i8: return "MVT::v1i8"; case MVT::v2i8: return "MVT::v2i8"; + case MVT::v3i8: return "MVT::v3i8"; case MVT::v4i8: return "MVT::v4i8"; case MVT::v8i8: return "MVT::v8i8"; case MVT::v16i8: return "MVT::v16i8"; diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index d47df597d53a35..fc958f5328736c 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -11,11 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "CodeGenTarget.h" #include "SequenceToOffsetTable.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGenTypes/MachineValueType.h" #include "llvm/Support/DXILABI.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" @@ -30,28 +33,15 @@ struct DXILShaderModel { int Minor = 0; }; -struct DXILParameter { - int Pos; // position in parameter list - ParameterKind Kind; - StringRef Name; // short, unique name - StringRef Doc; // the documentation description of this parameter - bool IsConst; // whether this argument requires a constant value in the IR - StringRef EnumName; // the name of the enum type if applicable - int MaxValue; // the maximum value for this parameter if applicable - DXILParameter(const Record *R); -}; - struct DXILOperationDesc { - StringRef OpName; // name of DXIL operation + std::string OpName; // name of DXIL operation int OpCode; // ID of DXIL operation StringRef OpClass; // name of the opcode class - StringRef Category; // classification for this instruction StringRef Doc; // the documentation description of this instruction - - SmallVector Params; // the operands that this instruction takes - SmallVector OverloadTypes; // overload types if applicable - StringRef Attr; // operation attribute; reference to string representation - // of llvm::Attribute::AttrKind + SmallVector OpTypes; // Vector of operand types - + // return type is at index 0 + SmallVector + OpAttributes; // operation attribute represented as strings StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which // means no map exists bool IsDeriv = false; // whether this is some kind of derivative @@ -74,81 +64,99 @@ struct DXILOperationDesc { }; } // end anonymous namespace -/*! - Convert DXIL type name string to dxil::ParameterKind - - @param typeNameStr Type name string - @return ParameterKind As defined in llvm/Support/DXILABI.h -*/ -static ParameterKind lookupParameterKind(StringRef typeNameStr) { - auto paramKind = StringSwitch(typeNameStr) - .Case("llvm_void_ty", ParameterKind::VOID) - .Case("llvm_half_ty", ParameterKind::HALF) - .Case("llvm_float_ty", ParameterKind::FLOAT) - .Case("llvm_double_ty", ParameterKind::DOUBLE) - .Case("llvm_i1_ty", ParameterKind::I1) - .Case("llvm_i8_ty", ParameterKind::I8) - .Case("llvm_i16_ty", ParameterKind::I16) - .Case("llvm_i32_ty", ParameterKind::I32) - .Case("llvm_i64_ty", ParameterKind::I64) - .Case("llvm_anyfloat_ty", ParameterKind::OVERLOAD) - .Case("llvm_anyint_ty", ParameterKind::OVERLOAD) - .Case("dxil_handle_ty", ParameterKind::DXIL_HANDLE) - .Case("dxil_cbuffer_ty", ParameterKind::CBUFFER_RET) - .Case("dxil_resource_ty", ParameterKind::RESOURCE_RET) - .Default(ParameterKind::INVALID); - assert(paramKind != ParameterKind::INVALID && - "Unsupported DXIL Type specified"); - return paramKind; +/// Convert DXIL type name string to dxil::ParameterKind +/// +/// \param VT Simple Value Type +/// \return ParameterKind As defined in llvm/Support/DXILABI.h + +static ParameterKind getParameterKind(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::isVoid: + return ParameterKind::VOID; + case MVT::f16: + return ParameterKind::HALF; + case MVT::f32: + return ParameterKind::FLOAT; + case MVT::f64: + return ParameterKind::DOUBLE; + case MVT::i1: + return ParameterKind::I1; + case MVT::i8: + return ParameterKind::I8; + case MVT::i16: + return ParameterKind::I16; + case MVT::i32: + return ParameterKind::I32; + case MVT::fAny: + case MVT::iAny: + return ParameterKind::OVERLOAD; + default: + llvm_unreachable("Support for specified DXIL Type not yet implemented"); + } } +/// Construct an object using the DXIL Operation records specified +/// in DXIL.td. This serves as the single source of reference of +/// the information extracted from the specified Record R, for +/// C++ code generated by this TableGen backend. +// \param R Object representing TableGen record of a DXIL Operation DXILOperationDesc::DXILOperationDesc(const Record *R) { - OpName = R->getValueAsString("OpName"); + OpName = R->getNameInitAsString(); OpCode = R->getValueAsInt("OpCode"); - OpClass = R->getValueAsDef("OpClass")->getValueAsString("Name"); - Category = R->getValueAsDef("OpCategory")->getValueAsString("Name"); - if (R->getValue("llvm_intrinsic")) { - auto *IntrinsicDef = R->getValueAsDef("llvm_intrinsic"); + Doc = R->getValueAsString("Doc"); + + if (R->getValue("LLVMIntrinsic")) { + auto *IntrinsicDef = R->getValueAsDef("LLVMIntrinsic"); auto DefName = IntrinsicDef->getName(); assert(DefName.starts_with("int_") && "invalid intrinsic name"); // Remove the int_ from intrinsic name. Intrinsic = DefName.substr(4); + // TODO: It is expected that return type and parameter types of + // DXIL Operation are the same as that of the intrinsic. Deviations + // are expected to be encoded in TableGen record specification and + // handled accordingly here. Support to be added later, as needed. + // Get parameter type list of the intrinsic. Types attribute contains + // the list of as [returnType, param1Type,, param2Type, ...] + + OverloadParamIndex = -1; + auto TypeRecs = IntrinsicDef->getValueAsListOfDefs("Types"); + unsigned TypeRecsSize = TypeRecs.size(); + // Populate return type and parameter type names + for (unsigned i = 0; i < TypeRecsSize; i++) { + auto TR = TypeRecs[i]; + OpTypes.emplace_back(getValueType(TR->getValueAsDef("VT"))); + // Get the overload parameter index. + // TODO : Seems hacky. Is it possible that more than one parameter can + // be of overload kind?? + // TODO: Check for any additional constraints specified for DXIL operation + // restricting return type. + if (i > 0) { + auto &CurParam = OpTypes.back(); + if (getParameterKind(CurParam) >= ParameterKind::OVERLOAD) { + OverloadParamIndex = i; + } + } + } + // Get the operation class + OpClass = R->getValueAsDef("OpClass")->getName(); + + // NOTE: For now, assume that attributes of DXIL Operation are the same as + // that of the intrinsic. Deviations are expected to be encoded in TableGen + // record specification and handled accordingly here. Support to be added + // later. + auto IntrPropList = IntrinsicDef->getValueAsListInit("IntrProperties"); + auto IntrPropListSize = IntrPropList->size(); + for (unsigned i = 0; i < IntrPropListSize; i++) { + OpAttributes.emplace_back(IntrPropList->getElement(i)->getAsString()); + } } - - Doc = R->getValueAsString("Doc"); - - ListInit *ParamList = R->getValueAsListInit("Params"); - OverloadParamIndex = -1; - for (unsigned I = 0; I < ParamList->size(); ++I) { - Record *Param = ParamList->getElementAsRecord(I); - Params.emplace_back(DXILParameter(Param)); - auto &CurParam = Params.back(); - if (CurParam.Kind >= ParameterKind::OVERLOAD) - OverloadParamIndex = I; - } - ListInit *OverloadTypeList = R->getValueAsListInit("OverloadTypes"); - - for (unsigned I = 0; I < OverloadTypeList->size(); ++I) { - Record *R = OverloadTypeList->getElementAsRecord(I); - OverloadTypes.emplace_back(lookupParameterKind(R->getNameInitAsString())); - } - Attr = StringRef(R->getValue("Attribute")->getNameInitAsString()); } -DXILParameter::DXILParameter(const Record *R) { - Name = R->getValueAsString("Name"); - Pos = R->getValueAsInt("Pos"); - Kind = - lookupParameterKind(R->getValue("ParamType")->getValue()->getAsString()); - if (R->getValue("Doc")) - Doc = R->getValueAsString("Doc"); - IsConst = R->getValueAsBit("IsConstant"); - EnumName = R->getValueAsString("EnumName"); - MaxValue = R->getValueAsInt("MaxValue"); -} - -static std::string parameterKindToString(ParameterKind Kind) { +/// Return a string representation of ParameterKind enum +/// \param Kind Parameter Kind enum value +/// \return std::string string representation of input Kind +static std::string getParameterKindStr(ParameterKind Kind) { switch (Kind) { case ParameterKind::INVALID: return "INVALID"; @@ -182,92 +190,77 @@ static std::string parameterKindToString(ParameterKind Kind) { llvm_unreachable("Unknown llvm::dxil::ParameterKind enum"); } -static void emitDXILOpEnum(DXILOperationDesc &Op, raw_ostream &OS) { - // Name = ID, // Doc - OS << Op.OpName << " = " << Op.OpCode << ", // " << Op.Doc << "\n"; -} +/// Return a string representation of OverloadKind enum that maps to +/// input Simple Value Type enum +/// \param VT Simple Value Type enum +/// \return std::string string representation of OverloadKind -static std::string buildCategoryStr(StringSet<> &Cetegorys) { - std::string Str; - raw_string_ostream OS(Str); - for (auto &It : Cetegorys) { - OS << " " << It.getKey(); +static std::string getOverloadKindStr(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::isVoid: + return "OverloadKind::VOID"; + case MVT::f16: + return "OverloadKind::HALF"; + case MVT::f32: + return "OverloadKind::FLOAT"; + case MVT::f64: + return "OverloadKind::DOUBLE"; + case MVT::i1: + return "OverloadKind::I1"; + case MVT::i8: + return "OverloadKind::I8"; + case MVT::i16: + return "OverloadKind::I16"; + case MVT::i32: + return "OverloadKind::I32"; + case MVT::i64: + return "OverloadKind::I64"; + case MVT::iAny: + return "OverloadKind::I16 | OverloadKind::I32 | OverloadKind::I64"; + case MVT::fAny: + return "OverloadKind::HALF | OverloadKind::FLOAT | OverloadKind::DOUBLE"; + default: + llvm_unreachable( + "Support for specified parameter OverloadKind not yet implemented"); } - return OS.str(); } -// Emit enum declaration for DXIL. +/// Emit Enums of DXIL Ops +/// \param A vector of DXIL Ops +/// \param Output stream static void emitDXILEnums(std::vector &Ops, raw_ostream &OS) { - // Sort by Category + OpName. + // Sort by OpCode llvm::sort(Ops, [](DXILOperationDesc &A, DXILOperationDesc &B) { - // Group by Category first. - if (A.Category == B.Category) - // Inside same Category, order by OpName. - return A.OpName < B.OpName; - else - return A.Category < B.Category; + return A.OpCode < B.OpCode; }); OS << "// Enumeration for operations specified by DXIL\n"; OS << "enum class OpCode : unsigned {\n"; - StringMap> ClassMap; - StringRef PrevCategory = ""; for (auto &Op : Ops) { - StringRef Category = Op.Category; - if (Category != PrevCategory) { - OS << "\n// " << Category << "\n"; - PrevCategory = Category; - } - emitDXILOpEnum(Op, OS); - auto It = ClassMap.find(Op.OpClass); - if (It != ClassMap.end()) { - It->second.insert(Op.Category); - } else { - ClassMap[Op.OpClass].insert(Op.Category); - } + // Name = ID, // Doc + OS << Op.OpName << " = " << Op.OpCode << ", // " << Op.Doc << "\n"; } OS << "\n};\n\n"; - std::vector> ClassVec; - for (auto &It : ClassMap) { - ClassVec.emplace_back( - std::pair(It.getKey().str(), buildCategoryStr(It.second))); - } - // Sort by Category + ClassName. - llvm::sort(ClassVec, [](std::pair &A, - std::pair &B) { - StringRef ClassA = A.first; - StringRef CategoryA = A.second; - StringRef ClassB = B.first; - StringRef CategoryB = B.second; - // Group by Category first. - if (CategoryA == CategoryB) - // Inside same Category, order by ClassName. - return ClassA < ClassB; - else - return CategoryA < CategoryB; - }); - OS << "// Groups for DXIL operations with equivalent function templates\n"; OS << "enum class OpCodeClass : unsigned {\n"; - PrevCategory = ""; - for (auto &It : ClassVec) { - - StringRef Category = It.second; - if (Category != PrevCategory) { - OS << "\n// " << Category << "\n"; - PrevCategory = Category; - } - StringRef Name = It.first; - OS << Name << ",\n"; + // Build an OpClass set to print + SmallSet OpClassSet; + for (auto &Op : Ops) { + OpClassSet.insert(Op.OpClass); + } + for (auto &C : OpClassSet) { + OS << C << ",\n"; } OS << "\n};\n\n"; } -// Emit map from llvm intrinsic to DXIL operation. +/// Emit map of DXIL operation to LLVM or DirectX intrinsic +/// \param A vector of DXIL Ops +/// \param Output stream static void emitDXILIntrinsicMap(std::vector &Ops, raw_ostream &OS) { OS << "\n"; @@ -285,75 +278,27 @@ static void emitDXILIntrinsicMap(std::vector &Ops, OS << "\n"; } -/*! - Convert operation attribute string to Attribute enum - - @param Attr string reference - @return std::string Attribute enum string - */ -static std::string emitDXILOperationAttr(StringRef Attr) { - return StringSwitch(Attr) - .Case("ReadNone", "Attribute::ReadNone") - .Case("ReadOnly", "Attribute::ReadOnly") - .Default("Attribute::None"); -} - -static std::string overloadKindStr(ParameterKind Overload) { - switch (Overload) { - case ParameterKind::HALF: - return "OverloadKind::HALF"; - case ParameterKind::FLOAT: - return "OverloadKind::FLOAT"; - case ParameterKind::DOUBLE: - return "OverloadKind::DOUBLE"; - case ParameterKind::I1: - return "OverloadKind::I1"; - case ParameterKind::I8: - return "OverloadKind::I8"; - case ParameterKind::I16: - return "OverloadKind::I16"; - case ParameterKind::I32: - return "OverloadKind::I32"; - case ParameterKind::I64: - return "OverloadKind::I64"; - case ParameterKind::VOID: - return "OverloadKind::VOID"; - default: - return "OverloadKind::UNKNOWN"; - } -} - -static std::string -getDXILOperationOverloads(SmallVector Overloads) { - // Format is: OverloadKind::FLOAT | OverloadKind::HALF - auto It = Overloads.begin(); - std::string Result; - raw_string_ostream OS(Result); - OS << overloadKindStr(*It); - for (++It; It != Overloads.end(); ++It) { - OS << " | " << overloadKindStr(*It); +/// Convert operation attribute string to Attribute enum +/// +/// \param Attr string reference +/// \return std::string Attribute enum string + +static std::string emitDXILOperationAttr(SmallVector Attrs) { + for (auto Attr : Attrs) { + // TODO: For now just recognize IntrNoMem and IntrReadMem as valid and + // ignore others. + if (Attr == "IntrNoMem") { + return "Attribute::ReadNone"; + } else if (Attr == "IntrReadMem") { + return "Attribute::ReadOnly"; + } } - return OS.str(); -} - -static std::string lowerFirstLetter(StringRef Name) { - if (Name.empty()) - return ""; - - std::string LowerName = Name.str(); - LowerName[0] = llvm::toLower(Name[0]); - return LowerName; -} - -static std::string getDXILOpClassName(StringRef OpClass) { - // Lower first letter expect for special case. - return StringSwitch(OpClass) - .Case("CBufferLoad", "cbufferLoad") - .Case("CBufferLoadLegacy", "cbufferLoadLegacy") - .Case("GSInstanceID", "gsInstanceID") - .Default(lowerFirstLetter(OpClass)); + return "Attribute::None"; } +/// Emit DXIL operation table +/// \param A vector of DXIL Ops +/// \param Output stream static void emitDXILOperationTable(std::vector &Ops, raw_ostream &OS) { // Sort by OpCode. @@ -369,15 +314,16 @@ static void emitDXILOperationTable(std::vector &Ops, StringMap> ParameterMap; StringSet<> ClassSet; for (auto &Op : Ops) { - OpStrings.add(Op.OpName.str()); + OpStrings.add(Op.OpName); if (ClassSet.contains(Op.OpClass)) continue; ClassSet.insert(Op.OpClass); - OpClassStrings.add(getDXILOpClassName(Op.OpClass)); + OpClassStrings.add(Op.OpClass.data()); SmallVector ParamKindVec; - for (auto &Param : Op.Params) { - ParamKindVec.emplace_back(Param.Kind); + // ParamKindVec is a vector of parameters. Skip return type at index 0 + for (unsigned i = 1; i < Op.OpTypes.size(); i++) { + ParamKindVec.emplace_back(getParameterKind(Op.OpTypes[i])); } ParameterMap[Op.OpClass] = ParamKindVec; Parameters.add(ParamKindVec); @@ -389,7 +335,7 @@ static void emitDXILOperationTable(std::vector &Ops, Parameters.layout(); // Emit the DXIL operation table. - //{dxil::OpCode::Sin, OpCodeNameIndex, OpCodeClass::Unary, + //{dxil::OpCode::Sin, OpCodeNameIndex, OpCodeClass::unary, // OpCodeClassNameIndex, // OverloadKind::FLOAT | OverloadKind::HALF, Attribute::AttrKind::ReadNone, 0, // 3, ParameterTableOffset}, @@ -398,12 +344,12 @@ static void emitDXILOperationTable(std::vector &Ops, OS << " static const OpCodeProperty OpCodeProps[] = {\n"; for (auto &Op : Ops) { - OS << " { dxil::OpCode::" << Op.OpName << ", " - << OpStrings.get(Op.OpName.str()) << ", OpCodeClass::" << Op.OpClass - << ", " << OpClassStrings.get(getDXILOpClassName(Op.OpClass)) << ", " - << getDXILOperationOverloads(Op.OverloadTypes) << ", " - << emitDXILOperationAttr(Op.Attr) << ", " << Op.OverloadParamIndex - << ", " << Op.Params.size() << ", " + OS << " { dxil::OpCode::" << Op.OpName << ", " << OpStrings.get(Op.OpName) + << ", OpCodeClass::" << Op.OpClass << ", " + << OpClassStrings.get(Op.OpClass.data()) << ", " + << getOverloadKindStr(Op.OpTypes[0]) << ", " + << emitDXILOperationAttr(Op.OpAttributes) << ", " + << Op.OverloadParamIndex << ", " << Op.OpTypes.size() - 1 << ", " << Parameters.get(ParameterMap[Op.OpClass]) << " },\n"; } OS << " };\n"; @@ -418,7 +364,7 @@ static void emitDXILOperationTable(std::vector &Ops, "OpCodeProperty &B) {\n"; OS << " return A.OpCode < B.OpCode;\n"; OS << " });\n"; - OS << " assert(Prop && \"fail to find OpCodeProperty\");\n"; + OS << " assert(Prop && \"failed to find OpCodeProperty\");\n"; OS << " return Prop;\n"; OS << "}\n\n"; @@ -450,7 +396,7 @@ static void emitDXILOperationTable(std::vector &Ops, Parameters.emit( OS, [](raw_ostream &ParamOS, ParameterKind Kind) { - ParamOS << "ParameterKind::" << parameterKindToString(Kind); + ParamOS << "ParameterKind::" << getParameterKindStr(Kind); }, "ParameterKind::INVALID"); OS << " };\n\n"; @@ -459,30 +405,28 @@ static void emitDXILOperationTable(std::vector &Ops, OS << "}\n "; } +/// Entry function call that invokes the functionality of this TableGen backend +/// \param Records TableGen records of DXIL Operations defined in DXIL.td +/// \param OS output stream static void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { - std::vector Ops = Records.getAllDerivedDefinitions("DXILOperation"); OS << "// Generated code, do not edit.\n"; OS << "\n"; - + // Get all DXIL Ops to intrinsic mapping records + std::vector OpIntrMaps = + Records.getAllDerivedDefinitions("DXILOpMapping"); std::vector DXILOps; - DXILOps.reserve(Ops.size()); - for (auto *Record : Ops) { + for (auto *Record : OpIntrMaps) { DXILOps.emplace_back(DXILOperationDesc(Record)); } - OS << "#ifdef DXIL_OP_ENUM\n"; emitDXILEnums(DXILOps, OS); OS << "#endif\n\n"; - OS << "#ifdef DXIL_OP_INTRINSIC_MAP\n"; emitDXILIntrinsicMap(DXILOps, OS); OS << "#endif\n\n"; - OS << "#ifdef DXIL_OP_OPERATION_TABLE\n"; emitDXILOperationTable(DXILOps, OS); OS << "#endif\n\n"; - - OS << "\n"; } static TableGen::Emitter::Opt X("gen-dxil-operation", EmitDXILOperation, diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index 5bab4ff188e8ed..51f18f360ed311 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -215,12 +215,15 @@ int64_t SearchableTableEmitter::getNumericKey(const SearchIndex &Index, Record *Rec) { assert(Index.Fields.size() == 1); + // To be consistent with compareBy and primaryRepresentation elsewhere, + // we check for IsInstruction before Enum-- these fields are not exclusive. + if (Index.Fields[0].IsInstruction) { + Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name); + return Target->getInstrIntValue(TheDef); + } if (Index.Fields[0].Enum) { Record *EnumEntry = Rec->getValueAsDef(Index.Fields[0].Name); return Index.Fields[0].Enum->EntryMap[EnumEntry]->second; - } else if (Index.Fields[0].IsInstruction) { - Record *TheDef = Rec->getValueAsDef(Index.Fields[0].Name); - return Target->getInstrIntValue(TheDef); } return getInt(Rec, Index.Fields[0].Name); diff --git a/mlir/include/mlir/Query/Matcher/ErrorBuilder.h b/mlir/include/mlir/Query/Matcher/ErrorBuilder.h index 1073daed8703f5..08f1f415cbd3e5 100644 --- a/mlir/include/mlir/Query/Matcher/ErrorBuilder.h +++ b/mlir/include/mlir/Query/Matcher/ErrorBuilder.h @@ -37,8 +37,12 @@ enum class ErrorType { None, // Parser Errors + ParserChainedExprInvalidArg, + ParserChainedExprNoCloseParen, + ParserChainedExprNoOpenParen, ParserFailedToBuildMatcher, ParserInvalidToken, + ParserMalformedChainedExpr, ParserNoCloseParen, ParserNoCode, ParserNoComma, @@ -50,9 +54,10 @@ enum class ErrorType { // Registry Errors RegistryMatcherNotFound, + RegistryNotBindable, RegistryValueNotFound, RegistryWrongArgCount, - RegistryWrongArgType + RegistryWrongArgType, }; void addError(Diagnostics *error, SourceRange range, ErrorType errorType, diff --git a/mlir/include/mlir/Query/Matcher/MatchersInternal.h b/mlir/include/mlir/Query/Matcher/MatchersInternal.h index 67455be592393b..117f7d4edef9e3 100644 --- a/mlir/include/mlir/Query/Matcher/MatchersInternal.h +++ b/mlir/include/mlir/Query/Matcher/MatchersInternal.h @@ -63,8 +63,15 @@ class DynMatcher { bool match(Operation *op) const { return implementation->match(op); } + void setFunctionName(StringRef name) { functionName = name.str(); }; + + bool hasFunctionName() const { return !functionName.empty(); }; + + StringRef getFunctionName() const { return functionName; }; + private: llvm::IntrusiveRefCntPtr implementation; + std::string functionName; }; } // namespace mlir::query::matcher diff --git a/mlir/lib/Query/Matcher/Diagnostics.cpp b/mlir/lib/Query/Matcher/Diagnostics.cpp index 10468dbcc53067..2a137e8fdfab0d 100644 --- a/mlir/lib/Query/Matcher/Diagnostics.cpp +++ b/mlir/lib/Query/Matcher/Diagnostics.cpp @@ -38,6 +38,8 @@ static llvm::StringRef errorTypeToFormatString(ErrorType type) { return "Incorrect type for arg $0. (Expected = $1) != (Actual = $2)"; case ErrorType::RegistryValueNotFound: return "Value not found: $0"; + case ErrorType::RegistryNotBindable: + return "Matcher does not support binding."; case ErrorType::ParserStringError: return "Error parsing string token: <$0>"; @@ -57,6 +59,14 @@ static llvm::StringRef errorTypeToFormatString(ErrorType type) { return "Unexpected end of code."; case ErrorType::ParserOverloadedType: return "Input value has unresolved overloaded type: $0"; + case ErrorType::ParserMalformedChainedExpr: + return "Period not followed by valid chained call."; + case ErrorType::ParserChainedExprInvalidArg: + return "Missing/Invalid argument for the chained call."; + case ErrorType::ParserChainedExprNoCloseParen: + return "Missing ')' for the chained call."; + case ErrorType::ParserChainedExprNoOpenParen: + return "Missing '(' for the chained call."; case ErrorType::ParserFailedToBuildMatcher: return "Failed to build matcher: $0."; diff --git a/mlir/lib/Query/Matcher/Parser.cpp b/mlir/lib/Query/Matcher/Parser.cpp index 30eb4801fc03c1..3609e24f9939f7 100644 --- a/mlir/lib/Query/Matcher/Parser.cpp +++ b/mlir/lib/Query/Matcher/Parser.cpp @@ -26,12 +26,17 @@ struct Parser::TokenInfo { text = newText; } + // Known identifiers. + static const char *const ID_Extract; + llvm::StringRef text; TokenKind kind = TokenKind::Eof; SourceRange range; VariantValue value; }; +const char *const Parser::TokenInfo::ID_Extract = "extract"; + class Parser::CodeTokenizer { public: // Constructor with matcherCode and error @@ -298,6 +303,36 @@ bool Parser::parseIdentifierPrefixImpl(VariantValue *value) { return parseMatcherExpressionImpl(nameToken, openToken, ctor, value); } +bool Parser::parseChainedExpression(std::string &argument) { + // Parse the parenthesized argument to .extract("foo") + // Note: EOF is handled inside the consume functions and would fail below when + // checking token kind. + const TokenInfo openToken = tokenizer->consumeNextToken(); + const TokenInfo argumentToken = tokenizer->consumeNextTokenIgnoreNewlines(); + const TokenInfo closeToken = tokenizer->consumeNextTokenIgnoreNewlines(); + + if (openToken.kind != TokenKind::OpenParen) { + error->addError(openToken.range, ErrorType::ParserChainedExprNoOpenParen); + return false; + } + + if (argumentToken.kind != TokenKind::Literal || + !argumentToken.value.isString()) { + error->addError(argumentToken.range, + ErrorType::ParserChainedExprInvalidArg); + return false; + } + + if (closeToken.kind != TokenKind::CloseParen) { + error->addError(closeToken.range, ErrorType::ParserChainedExprNoCloseParen); + return false; + } + + // If all checks passed, extract the argument and return true. + argument = argumentToken.value.getString(); + return true; +} + // Parse the arguments of a matcher bool Parser::parseMatcherArgs(std::vector &args, MatcherCtor ctor, const TokenInfo &nameToken, TokenInfo &endToken) { @@ -364,13 +399,34 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &nameToken, return false; } + std::string functionName; + if (tokenizer->peekNextToken().kind == TokenKind::Period) { + tokenizer->consumeNextToken(); + TokenInfo chainCallToken = tokenizer->consumeNextToken(); + if (chainCallToken.kind == TokenKind::CodeCompletion) { + addCompletion(chainCallToken, MatcherCompletion("extract(\"", "extract")); + return false; + } + + if (chainCallToken.kind != TokenKind::Ident || + chainCallToken.text != TokenInfo::ID_Extract) { + error->addError(chainCallToken.range, + ErrorType::ParserMalformedChainedExpr); + return false; + } + + if (chainCallToken.text == TokenInfo::ID_Extract && + !parseChainedExpression(functionName)) + return false; + } + if (!ctor) return false; // Merge the start and end infos. SourceRange matcherRange = nameToken.range; matcherRange.end = endToken.range.end; - VariantMatcher result = - sema->actOnMatcherExpression(*ctor, matcherRange, args, error); + VariantMatcher result = sema->actOnMatcherExpression( + *ctor, matcherRange, functionName, args, error); if (result.isNull()) return false; *value = result; @@ -470,9 +526,10 @@ Parser::RegistrySema::lookupMatcherCtor(llvm::StringRef matcherName) { } VariantMatcher Parser::RegistrySema::actOnMatcherExpression( - MatcherCtor ctor, SourceRange nameRange, llvm::ArrayRef args, - Diagnostics *error) { - return RegistryManager::constructMatcher(ctor, nameRange, args, error); + MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName, + llvm::ArrayRef args, Diagnostics *error) { + return RegistryManager::constructMatcher(ctor, nameRange, functionName, args, + error); } std::vector Parser::RegistrySema::getAcceptedCompletionTypes( diff --git a/mlir/lib/Query/Matcher/Parser.h b/mlir/lib/Query/Matcher/Parser.h index f049af34e9c907..58968023022d56 100644 --- a/mlir/lib/Query/Matcher/Parser.h +++ b/mlir/lib/Query/Matcher/Parser.h @@ -64,10 +64,9 @@ class Parser { // Process a matcher expression. The caller takes ownership of the Matcher // object returned. - virtual VariantMatcher - actOnMatcherExpression(MatcherCtor ctor, SourceRange nameRange, - llvm::ArrayRef args, - Diagnostics *error) = 0; + virtual VariantMatcher actOnMatcherExpression( + MatcherCtor ctor, SourceRange nameRange, llvm::StringRef functionName, + llvm::ArrayRef args, Diagnostics *error) = 0; // Look up a matcher by name in the matcher name found by the parser. virtual std::optional @@ -93,10 +92,11 @@ class Parser { std::optional lookupMatcherCtor(llvm::StringRef matcherName) override; - VariantMatcher actOnMatcherExpression(MatcherCtor ctor, - SourceRange nameRange, - llvm::ArrayRef args, - Diagnostics *error) override; + VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, + SourceRange NameRange, + StringRef functionName, + ArrayRef Args, + Diagnostics *Error) override; std::vector getAcceptedCompletionTypes( llvm::ArrayRef> context) override; @@ -153,6 +153,8 @@ class Parser { Parser(CodeTokenizer *tokenizer, const Registry &matcherRegistry, const NamedValueMap *namedValues, Diagnostics *error); + bool parseChainedExpression(std::string &argument); + bool parseExpressionImpl(VariantValue *value); bool parseMatcherArgs(std::vector &args, MatcherCtor ctor, diff --git a/mlir/lib/Query/Matcher/RegistryManager.cpp b/mlir/lib/Query/Matcher/RegistryManager.cpp index 01856aa8ffa67f..8c9197f4d00981 100644 --- a/mlir/lib/Query/Matcher/RegistryManager.cpp +++ b/mlir/lib/Query/Matcher/RegistryManager.cpp @@ -132,8 +132,19 @@ RegistryManager::getMatcherCompletions(llvm::ArrayRef acceptedTypes, VariantMatcher RegistryManager::constructMatcher( MatcherCtor ctor, internal::SourceRange nameRange, - llvm::ArrayRef args, internal::Diagnostics *error) { - return ctor->create(nameRange, args, error); + llvm::StringRef functionName, llvm::ArrayRef args, + internal::Diagnostics *error) { + VariantMatcher out = ctor->create(nameRange, args, error); + if (functionName.empty() || out.isNull()) + return out; + + if (std::optional result = out.getDynMatcher()) { + result->setFunctionName(functionName); + return VariantMatcher::SingleMatcher(*result); + } + + error->addError(nameRange, internal::ErrorType::RegistryNotBindable); + return {}; } } // namespace mlir::query::matcher diff --git a/mlir/lib/Query/Matcher/RegistryManager.h b/mlir/lib/Query/Matcher/RegistryManager.h index 5f2867261225e7..e2026e97f83dcb 100644 --- a/mlir/lib/Query/Matcher/RegistryManager.h +++ b/mlir/lib/Query/Matcher/RegistryManager.h @@ -61,6 +61,7 @@ class RegistryManager { static VariantMatcher constructMatcher(MatcherCtor ctor, internal::SourceRange nameRange, + llvm::StringRef functionName, ArrayRef args, internal::Diagnostics *error); }; diff --git a/mlir/lib/Query/Query.cpp b/mlir/lib/Query/Query.cpp index 5c42e5a5f0a116..27db52b37dade0 100644 --- a/mlir/lib/Query/Query.cpp +++ b/mlir/lib/Query/Query.cpp @@ -8,6 +8,8 @@ #include "mlir/Query/Query.h" #include "QueryParser.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/IRMapping.h" #include "mlir/Query/Matcher/MatchFinder.h" #include "mlir/Query/QuerySession.h" #include "mlir/Support/LogicalResult.h" @@ -34,6 +36,70 @@ static void printMatch(llvm::raw_ostream &os, QuerySession &qs, Operation *op, "\"" + binding + "\" binds here"); } +// TODO: Extract into a helper function that can be reused outside query +// context. +static Operation *extractFunction(std::vector &ops, + MLIRContext *context, + llvm::StringRef functionName) { + context->loadDialect(); + OpBuilder builder(context); + + // Collect data for function creation + std::vector slice; + std::vector values; + std::vector outputTypes; + + for (auto *op : ops) { + // Return op's operands are propagated, but the op itself isn't needed. + if (!isa(op)) + slice.push_back(op); + + // All results are returned by the extracted function. + outputTypes.insert(outputTypes.end(), op->getResults().getTypes().begin(), + op->getResults().getTypes().end()); + + // Track all values that need to be taken as input to function. + values.insert(values.end(), op->getOperands().begin(), + op->getOperands().end()); + } + + // Create the function + FunctionType funcType = + builder.getFunctionType(ValueRange(values), outputTypes); + auto loc = builder.getUnknownLoc(); + func::FuncOp funcOp = func::FuncOp::create(loc, functionName, funcType); + + builder.setInsertionPointToEnd(funcOp.addEntryBlock()); + + // Map original values to function arguments + IRMapping mapper; + for (const auto &arg : llvm::enumerate(values)) + mapper.map(arg.value(), funcOp.getArgument(arg.index())); + + // Clone operations and build function body + std::vector clonedOps; + std::vector clonedVals; + for (Operation *slicedOp : slice) { + Operation *clonedOp = + clonedOps.emplace_back(builder.clone(*slicedOp, mapper)); + clonedVals.insert(clonedVals.end(), clonedOp->result_begin(), + clonedOp->result_end()); + } + // Add return operation + builder.create(loc, clonedVals); + + // Remove unused function arguments + size_t currentIndex = 0; + while (currentIndex < funcOp.getNumArguments()) { + if (funcOp.getArgument(currentIndex).use_empty()) + funcOp.eraseArgument(currentIndex); + else + ++currentIndex; + } + + return funcOp; +} + Query::~Query() = default; mlir::LogicalResult InvalidQuery::run(llvm::raw_ostream &os, @@ -65,9 +131,21 @@ mlir::LogicalResult QuitQuery::run(llvm::raw_ostream &os, mlir::LogicalResult MatchQuery::run(llvm::raw_ostream &os, QuerySession &qs) const { + Operation *rootOp = qs.getRootOp(); int matchCount = 0; std::vector matches = - matcher::MatchFinder().getMatches(qs.getRootOp(), matcher); + matcher::MatchFinder().getMatches(rootOp, matcher); + + // An extract call is recognized by considering if the matcher has a name. + // TODO: Consider making the extract more explicit. + if (matcher.hasFunctionName()) { + auto functionName = matcher.getFunctionName(); + Operation *function = + extractFunction(matches, rootOp->getContext(), functionName); + os << "\n" << *function << "\n\n"; + return mlir::success(); + } + os << "\n"; for (Operation *op : matches) { os << "Match #" << ++matchCount << ":\n\n"; diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir index 228d4e5f6f8a1a..e1f062121b12f9 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -28,7 +28,7 @@ }> module { - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %f0 = arith.constant 0.0 : f64 %d = arith.constant dense<[[ 1.0, 2.0, 3.0, 4.0 ], @@ -39,19 +39,14 @@ module { %s = sparse_tensor.convert %d : tensor<5x4xf64> to tensor<5x4xf64, #CSR_hi> // - // CHECK: ( 0, 4, 4, 8, 8, 9, 9, 13 ) - // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 2, 0, 1, 2, 3, 0, 1, 2, 3 ) - // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 5.5, 9, 10, 11, 12, 13, 14, 15, 16 ) + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 17 + // CHECK-NEXT: pos[1] : ( 0, 4, 4, 8, 8, 9, 9, 13 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 2, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 5.5, 9, 10, 11, 12, 13, 14, 15, 16 + // CHECK-NEXT: ---- // - %pos = sparse_tensor.positions %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref - %vecp = vector.transfer_read %pos[%c0], %c0 : memref, vector<8xindex> - vector.print %vecp : vector<8xindex> - %crd = sparse_tensor.coordinates %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref - %vecc = vector.transfer_read %crd[%c0], %c0 : memref, vector<17xindex> - vector.print %vecc : vector<17xindex> - %val = sparse_tensor.values %s : tensor<5x4xf64, #CSR_hi> to memref - %vecv = vector.transfer_read %val[%c0], %f0 : memref, vector<17xf64> - vector.print %vecv : vector<17xf64> + sparse_tensor.print %s : tensor<5x4xf64, #CSR_hi> // Release the resources. bufferization.dealloc_tensor %s: tensor<5x4xf64, #CSR_hi> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index fa0dbac269b926..863e1c62370e32 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -90,7 +90,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index // Initialize various matrices, dense for stress testing, @@ -140,33 +140,94 @@ module { %b4 = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #DCSR> // - // Sanity check on stored entries before going into the computations. - // - // CHECK: 32 - // CHECK-NEXT: 32 - // CHECK-NEXT: 4 - // CHECK-NEXT: 4 - // CHECK-NEXT: 32 - // CHECK-NEXT: 32 - // CHECK-NEXT: 8 - // CHECK-NEXT: 8 - // - %noea1 = sparse_tensor.number_of_entries %a1 : tensor<4x8xf64, #CSR> - %noea2 = sparse_tensor.number_of_entries %a2 : tensor<4x8xf64, #DCSR> - %noea3 = sparse_tensor.number_of_entries %a3 : tensor<4x8xf64, #CSR> - %noea4 = sparse_tensor.number_of_entries %a4 : tensor<4x8xf64, #DCSR> - %noeb1 = sparse_tensor.number_of_entries %b1 : tensor<8x4xf64, #CSR> - %noeb2 = sparse_tensor.number_of_entries %b2 : tensor<8x4xf64, #DCSR> - %noeb3 = sparse_tensor.number_of_entries %b3 : tensor<8x4xf64, #CSR> - %noeb4 = sparse_tensor.number_of_entries %b4 : tensor<8x4xf64, #DCSR> - vector.print %noea1 : index - vector.print %noea2 : index - vector.print %noea3 : index - vector.print %noea4 : index - vector.print %noeb1 : index - vector.print %noeb2 : index - vector.print %noeb3 : index - vector.print %noeb4 : index + // Sanity check before going into the computations. + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: pos[1] : ( 0, 8, 16, 24, 32 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + // CHECK-NEXT: values : ( 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3, 1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4 + // CHECK-NEXT: ---- + // + sparse_tensor.print %a1 : tensor<4x8xf64, #CSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 8, 16, 24, 32 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 + // CHECK-NEXT: values : ( 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3, 1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4 + // CHECK-NEXT: ---- + // + sparse_tensor.print %a2 : tensor<4x8xf64, #DCSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: pos[1] : ( 0, 2, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 1, 5, 1, 7 + // CHECK-NEXT: values : ( 2.1, 6.1, 2.3, 1 + // CHECK-NEXT: ---- + // + sparse_tensor.print %a3 : tensor<4x8xf64, #CSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 4 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 4 + // CHECK-NEXT: crd[1] : ( 1, 5, 1, 7 + // CHECK-NEXT: values : ( 2.1, 6.1, 2.3, 1 + // CHECK-NEXT: ---- + // + sparse_tensor.print %a4 : tensor<4x8xf64, #DCSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12, 16, 20, 24, 28, 32 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 10.1, 11.1, 12.1, 13.1, 10.2, 11.2, 12.2, 13.2, 10.3, 11.3, 12.3, 13.3, 10.4, 11.4, 12.4, 13.4, 10.5, 11.5, 12.5, 13.5, 10.6, 11.6, 12.6, 13.6, 10.7, 11.7, 12.7, 13.7, 10.8, 11.8, 12.8, 13.8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %b1 : tensor<8x4xf64, #CSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 32 + // CHECK-NEXT: pos[0] : ( 0, 8 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7 + // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12, 16, 20, 24, 28, 32 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 10.1, 11.1, 12.1, 13.1, 10.2, 11.2, 12.2, 13.2, 10.3, 11.3, 12.3, 13.3, 10.4, 11.4, 12.4, 13.4, 10.5, 11.5, 12.5, 13.5, 10.6, 11.6, 12.6, 13.6, 10.7, 11.7, 12.7, 13.7, 10.8, 11.8, 12.8, 13.8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %b2 : tensor<8x4xf64, #DCSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 8 + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 3, 4, 4, 5, 6, 8 + // CHECK-NEXT: crd[1] : ( 3, 2, 1, 0, 1, 2, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %b3 : tensor<8x4xf64, #CSR> + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 8 + // CHECK-NEXT: pos[0] : ( 0, 7 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 5, 6, 7 + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 3, 4, 5, 6, 8 + // CHECK-NEXT: crd[1] : ( 3, 2, 1, 0, 1, 2, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8 + // CHECK-NEXT: ---- + // + sparse_tensor.print %b4 : tensor<8x4xf64, #DCSR> // Call kernels with dense. %0 = call @matmul1(%da, %db, %zero) @@ -208,24 +269,26 @@ module { call @printMemrefF64(%u0) : (tensor<*xf64>) -> () // - // CHECK: {{\[}}[388.76, 425.56, 462.36, 499.16], - // CHECK-NEXT: [397.12, 434.72, 472.32, 509.92], - // CHECK-NEXT: [405.48, 443.88, 482.28, 520.68], - // CHECK-NEXT: [413.84, 453.04, 492.24, 531.44]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12, 16 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 388.76, 425.56, 462.36, 499.16, 397.12, 434.72, 472.32, 509.92, 405.48, 443.88, 482.28, 520.68, 413.84, 453.04, 492.24, 531.44 + // CHECK-NEXT: ---- // - %c1 = sparse_tensor.convert %1 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c1u = tensor.cast %c1 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c1u) : (tensor<*xf64>) -> () + sparse_tensor.print %1 : tensor<4x4xf64, #CSR> // - // CHECK: {{\[}}[388.76, 425.56, 462.36, 499.16], - // CHECK-NEXT: [397.12, 434.72, 472.32, 509.92], - // CHECK-NEXT: [405.48, 443.88, 482.28, 520.68], - // CHECK-NEXT: [413.84, 453.04, 492.24, 531.44]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 16 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12, 16 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 388.76, 425.56, 462.36, 499.16, 397.12, 434.72, 472.32, 509.92, 405.48, 443.88, 482.28, 520.68, 413.84, 453.04, 492.24, 531.44 + // CHECK-NEXT: ---- // - %c2 = sparse_tensor.convert %2 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> - %c2u = tensor.cast %c2 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c2u) : (tensor<*xf64>) -> () + sparse_tensor.print %2 : tensor<4x4xf64, #DCSR> // // CHECK: {{\[}}[86.08, 94.28, 102.48, 110.68], @@ -237,24 +300,26 @@ module { call @printMemrefF64(%u3) : (tensor<*xf64>) -> () // - // CHECK: {{\[}}[86.08, 94.28, 102.48, 110.68], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [23.46, 25.76, 28.06, 30.36], - // CHECK-NEXT: [10.8, 11.8, 12.8, 13.8]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 12 + // CHECK-NEXT: pos[1] : ( 0, 4, 4, 8, 12 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 86.08, 94.28, 102.48, 110.68, 23.46, 25.76, 28.06, 30.36, 10.8, 11.8, 12.8, 13.8 + // CHECK-NEXT: ---- // - %c4 = sparse_tensor.convert %4 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c4u = tensor.cast %c4 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c4u) : (tensor<*xf64>) -> () + sparse_tensor.print %4 : tensor<4x4xf64, #CSR> // - // CHECK: {{\[}}[86.08, 94.28, 102.48, 110.68], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [23.46, 25.76, 28.06, 30.36], - // CHECK-NEXT: [10.8, 11.8, 12.8, 13.8]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 12 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12 + // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 + // CHECK-NEXT: values : ( 86.08, 94.28, 102.48, 110.68, 23.46, 25.76, 28.06, 30.36, 10.8, 11.8, 12.8, 13.8 + // CHECK-NEXT: ---- // - %c5 = sparse_tensor.convert %5 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> - %c5u = tensor.cast %c5 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c5u) : (tensor<*xf64>) -> () + sparse_tensor.print %5 : tensor<4x4xf64, #DCSR> // // CHECK: {{\[}}[0, 30.5, 4.2, 0], @@ -266,46 +331,26 @@ module { call @printMemrefF64(%u6) : (tensor<*xf64>) -> () // - // CHECK: {{\[}}[0, 30.5, 4.2, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 4.6, 0], - // CHECK-NEXT: [0, 0, 7, 8]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: pos[1] : ( 0, 2, 2, 3, 5 + // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 + // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 + // CHECK-NEXT: ---- // - %c7 = sparse_tensor.convert %7 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c7u = tensor.cast %c7 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c7u) : (tensor<*xf64>) -> () + sparse_tensor.print %7 : tensor<4x4xf64, #CSR> // - // CHECK: {{\[}}[0, 30.5, 4.2, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 4.6, 0], - // CHECK-NEXT: [0, 0, 7, 8]] - // - %c8 = sparse_tensor.convert %8 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> - %c8u = tensor.cast %c8 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c8u) : (tensor<*xf64>) -> () - - // - // Sanity check on nonzeros. - // - // CHECK: [30.5, 4.2, 4.6, 7, 8{{.*}}] - // CHECK: [30.5, 4.2, 4.6, 7, 8{{.*}}] - // - %val7 = sparse_tensor.values %7 : tensor<4x4xf64, #CSR> to memref - %val8 = sparse_tensor.values %8 : tensor<4x4xf64, #DCSR> to memref - call @printMemref1dF64(%val7) : (memref) -> () - call @printMemref1dF64(%val8) : (memref) -> () - - // - // Sanity check on stored entries after the computations. - // - // CHECK-NEXT: 5 - // CHECK-NEXT: 5 + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 5 + // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 + // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 + // CHECK-NEXT: ---- // - %noe7 = sparse_tensor.number_of_entries %7 : tensor<4x4xf64, #CSR> - %noe8 = sparse_tensor.number_of_entries %8 : tensor<4x4xf64, #DCSR> - vector.print %noe7 : index - vector.print %noe8 : index + sparse_tensor.print %8 : tensor<4x4xf64, #DCSR> // Release the resources. bufferization.dealloc_tensor %a1 : tensor<4x8xf64, #CSR> @@ -316,12 +361,6 @@ module { bufferization.dealloc_tensor %b2 : tensor<8x4xf64, #DCSR> bufferization.dealloc_tensor %b3 : tensor<8x4xf64, #CSR> bufferization.dealloc_tensor %b4 : tensor<8x4xf64, #DCSR> - bufferization.dealloc_tensor %c1 : tensor<4x4xf64> - bufferization.dealloc_tensor %c2 : tensor<4x4xf64> - bufferization.dealloc_tensor %c4 : tensor<4x4xf64> - bufferization.dealloc_tensor %c5 : tensor<4x4xf64> - bufferization.dealloc_tensor %c7 : tensor<4x4xf64> - bufferization.dealloc_tensor %c8 : tensor<4x4xf64> bufferization.dealloc_tensor %0 : tensor<4x4xf64> bufferization.dealloc_tensor %1 : tensor<4x4xf64, #CSR> bufferization.dealloc_tensor %2 : tensor<4x4xf64, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir index 96c8a30ade8e42..a7184c044569ca 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -132,7 +132,7 @@ module { // // Main driver. // - func.func @entry() { + func.func @main() { %c_0 = arith.constant 0 : index %c_1 = arith.constant 1 : index %c_2 = arith.constant 2 : index @@ -170,14 +170,16 @@ module { // DCSR test // - // CHECK: [0, 30.5, 4.2, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 4.6, 0], - // CHECK-NEXT: [0, 0, 7, 8] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 5 + // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 + // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 + // CHECK-NEXT: ---- // - %c2 = sparse_tensor.convert %2 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64> - %c2u = tensor.cast %c2 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c2u) : (tensor<*xf64>) -> () + sparse_tensor.print %2 : tensor<4x4xf64, #DCSR> %t1 = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #CSR> %a1 = tensor.extract_slice %t1[0, 0][4, 8][1, 1] : tensor<8x8xf64, #CSR> to tensor<4x8xf64, #CSR_SLICE> @@ -188,63 +190,64 @@ module { // CSR test // - // CHECK: [0, 30.5, 4.2, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 4.6, 0], - // CHECK-NEXT: [0, 0, 7, 8] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 5 + // CHECK-NEXT: pos[1] : ( 0, 2, 2, 3, 5 + // CHECK-NEXT: crd[1] : ( 1, 2, 2, 2, 3 + // CHECK-NEXT: values : ( 30.5, 4.2, 4.6, 7, 8 + // CHECK-NEXT: ---- // - %c3 = sparse_tensor.convert %3 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c3u = tensor.cast %c3 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c3u) : (tensor<*xf64>) -> () + sparse_tensor.print %3 : tensor<4x4xf64, #CSR> + // slice x slice // - // CHECK: [2.3, 0, 0, 0], - // CHECK-NEXT: [6.9, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [12.6, 0, 0, 0]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 3 + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 0, 0 + // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 + // CHECK-NEXT: ---- // + sparse_tensor.print %4 : tensor<4x4xf64, #CSR> + %s1 = tensor.extract_slice %tmp[0, 1][4, 4][2, 1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_1> %s2 = tensor.extract_slice %b1[0, 0][4, 4][2, 1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_1> %4 = call @matmul1(%s2, %s1) : (tensor<4x4xf64, #CSR_SLICE_1>, tensor<4x4xf64, #DCSR_SLICE_1>) -> tensor<4x4xf64, #CSR> - %c4 = sparse_tensor.convert %4 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c4u = tensor.cast %c4 : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c4u) : (tensor<*xf64>) -> () // slice coo x slice coo // - // CHECK: [2.3, 0, 0, 0], - // CHECK-NEXT: [6.9, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [12.6, 0, 0, 0]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 3 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 0, 1, 0, 3, 0 + // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 + // CHECK-NEXT: ---- // + sparse_tensor.print %o_coo : tensor<4x4xf64, #COO> %t1_coo = sparse_tensor.convert %sa : tensor<8x8xf64> to tensor<8x8xf64, #COO> %b1_coo = sparse_tensor.convert %sb : tensor<8x4xf64> to tensor<8x4xf64, #COO> %s2_coo = tensor.extract_slice %b1_coo[0, 0][4, 4][2, 1] : tensor<8x4xf64, #COO> to tensor<4x4xf64, #COO_SLICE_1> %s1_coo = tensor.extract_slice %t1_coo[0, 1][4, 4][2, 1] : tensor<8x8xf64, #COO> to tensor<4x4xf64, #COO_SLICE_2> %o_coo = call @matmul5(%s2_coo, %s1_coo) : (tensor<4x4xf64, #COO_SLICE_1>, tensor<4x4xf64, #COO_SLICE_2>) -> tensor<4x4xf64, #COO> - %c4_coo = sparse_tensor.convert %o_coo : tensor<4x4xf64, #COO> to tensor<4x4xf64> - %c4u_coo = tensor.cast %c4_coo : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c4u_coo) : (tensor<*xf64>) -> () - // slice x slice (same as above, but with dynamic stride information) // - // CHECK: [2.3, 0, 0, 0], - // CHECK-NEXT: [6.9, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0], - // CHECK-NEXT: [12.6, 0, 0, 0]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 3 + // CHECK-NEXT: pos[1] : ( 0, 1, 2, 2, 3 + // CHECK-NEXT: crd[1] : ( 0, 0, 0 + // CHECK-NEXT: values : ( 2.3, 6.9, 12.6 + // CHECK-NEXT: ---- // + sparse_tensor.print %dyn_4 : tensor<4x4xf64, #CSR> %s1_dyn = tensor.extract_slice %tmp[%c_0, %c_1][4, 4][%c_2, %c_1] : tensor<8x8xf64, #DCSR> to tensor<4x4xf64, #DCSR_SLICE_dyn> %s2_dyn = tensor.extract_slice %b1[%c_0, %c_0][4, 4][%c_2, %c_1] : tensor<8x4xf64, #CSR> to tensor<4x4xf64, #CSR_SLICE_dyn> %dyn_4 = call @matmul_dyn(%s2_dyn, %s1_dyn) : (tensor<4x4xf64, #CSR_SLICE_dyn>, tensor<4x4xf64, #DCSR_SLICE_dyn>) -> tensor<4x4xf64, #CSR> - %c4_dyn = sparse_tensor.convert %dyn_4 : tensor<4x4xf64, #CSR> to tensor<4x4xf64> - %c4u_dyn = tensor.cast %c4_dyn : tensor<4x4xf64> to tensor<*xf64> - call @printMemrefF64(%c4u_dyn) : (tensor<*xf64>) -> () // sparse slices should generate the same result as dense slices // @@ -265,11 +268,6 @@ module { call @printMemrefF64(%du) : (tensor<*xf64>) -> () // Releases resources. - bufferization.dealloc_tensor %c2 : tensor<4x4xf64> - bufferization.dealloc_tensor %c3 : tensor<4x4xf64> - bufferization.dealloc_tensor %c4 : tensor<4x4xf64> - bufferization.dealloc_tensor %c4_coo : tensor<4x4xf64> - bufferization.dealloc_tensor %c4_dyn : tensor<4x4xf64> bufferization.dealloc_tensor %d : tensor<4x4xf64> bufferization.dealloc_tensor %b1 : tensor<8x4xf64, #CSR> bufferization.dealloc_tensor %t1 : tensor<8x8xf64, #CSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir index 2cecc242034389..2cef46f4cb1546 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir @@ -10,7 +10,7 @@ // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}" // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}" // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils -// DEFINE: %{run_opts} = -e entry -entry-point-result=void +// DEFINE: %{run_opts} = -e main -entry-point-result=void // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs} // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs} // @@ -61,8 +61,6 @@ } module { - func.func private @printMemrefF64(%ptr : tensor<*xf64>) - // Scales a sparse matrix into a new sparse matrix. func.func @matrix_scale(%arga: tensor) -> tensor { %s = arith.constant 2.0 : f64 @@ -129,17 +127,8 @@ module { return %0 : tensor } - // Dump a sparse matrix. - func.func @dump(%arg0: tensor) { - %dm = sparse_tensor.convert %arg0 : tensor to tensor - %u = tensor.cast %dm : tensor to tensor<*xf64> - call @printMemrefF64(%u) : (tensor<*xf64>) -> () - bufferization.dealloc_tensor %dm : tensor - return - } - // Driver method to call and verify matrix kernels. - func.func @entry() { + func.func @main() { %c0 = arith.constant 0 : index %d1 = arith.constant 1.1 : f64 @@ -170,37 +159,76 @@ module { // // Verify the results. // - // CHECK: {{\[}}[1, 2, 0, 0, 0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 3], - // CHECK-NEXT: [0, 0, 4, 0, 5, 0, 0, 6], - // CHECK-NEXT: [7, 0, 8, 9, 0, 0, 0, 0]] - // CHECK: {{\[}}[6, 0, 0, 0, 0, 0, 0, 5], - // CHECK-NEXT: [4, 0, 0, 0, 0, 0, 3, 0], - // CHECK-NEXT: [0, 2, 0, 0, 0, 0, 0, 1], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]] - // CHECK: {{\[}}[2, 4, 0, 0, 0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 6], - // CHECK-NEXT: [0, 0, 8, 0, 10, 0, 0, 12], - // CHECK-NEXT: [14, 0, 16, 18, 0, 0, 0, 0]] - // CHECK: {{\[}}[2, 4, 0, 0, 0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 6], - // CHECK-NEXT: [0, 0, 8, 0, 10, 0, 0, 12], - // CHECK-NEXT: [14, 0, 16, 18, 0, 0, 0, 0]] - // CHECK: {{\[}}[8, 4, 0, 0, 0, 0, 0, 5], - // CHECK-NEXT: [4, 0, 0, 0, 0, 0, 3, 6], - // CHECK-NEXT: [0, 2, 8, 0, 10, 0, 0, 13], - // CHECK-NEXT: [14, 0, 16, 18, 0, 0, 0, 0]] - // CHECK: {{\[}}[12, 0, 0, 0, 0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 12], - // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]] + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9 + // CHECK-NEXT: ---- + // + sparse_tensor.print %sm1 : tensor + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 6 + // CHECK-NEXT: pos[0] : ( 0, 3 + // CHECK-NEXT: crd[0] : ( 0, 1, 2 + // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6 + // CHECK-NEXT: crd[1] : ( 0, 7, 0, 6, 1, 7 + // CHECK-NEXT: values : ( 6, 5, 4, 3, 2, 1 + // CHECK-NEXT: ---- + // + sparse_tensor.print %sm2 : tensor + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // + sparse_tensor.print %0 : tensor + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 9 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 2, 3, 6, 9 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 2, 4, 6, 8, 10, 12, 14, 16, 18 + // CHECK-NEXT: ---- + // + sparse_tensor.print %1 : tensor + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 13 + // CHECK-NEXT: pos[0] : ( 0, 4 + // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3 + // CHECK-NEXT: pos[1] : ( 0, 3, 6, 10, 13 + // CHECK-NEXT: crd[1] : ( 0, 1, 7, 0, 6, 7, 1, 2, 4, 7, 0, 2, 3 + // CHECK-NEXT: values : ( 8, 4, 5, 4, 3, 6, 2, 8, 10, 13, 14, 16, 18 + // CHECK-NEXT: ---- + // + sparse_tensor.print %2 : tensor + + // + // CHECK: ---- Sparse Tensor ---- + // CHECK-NEXT: nse = 2 + // CHECK-NEXT: pos[0] : ( 0, 2 + // CHECK-NEXT: crd[0] : ( 0, 2 + // CHECK-NEXT: pos[1] : ( 0, 1, 2 + // CHECK-NEXT: crd[1] : ( 0, 7 + // CHECK-NEXT: values : ( 12, 12 + // CHECK-NEXT: ---- // - call @dump(%sm1) : (tensor) -> () - call @dump(%sm2) : (tensor) -> () - call @dump(%0) : (tensor) -> () - call @dump(%1) : (tensor) -> () - call @dump(%2) : (tensor) -> () - call @dump(%3) : (tensor) -> () + sparse_tensor.print %3 : tensor // Release the resources. bufferization.dealloc_tensor %sm1 : tensor diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/Emulated/test-setArmVLBits.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/Emulated/test-setArmVLBits.mlir index 4f46c6e1ebf6a8..aa8d0e4d5104ab 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/Emulated/test-setArmVLBits.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/Emulated/test-setArmVLBits.mlir @@ -8,7 +8,7 @@ func.func @checkVScale() { %vscale = vector.vscale - vector.print str "vscale" + vector.print str "vscale = " vector.print %vscale : index return } @@ -20,28 +20,23 @@ func.func @setAndCheckVL(%bits: i32) { } func.func @main() { - // CHECK: vscale - // CHECK-NEXT: 1 + // CHECK: vscale = 1 %c128 = arith.constant 128 : i32 func.call @setAndCheckVL(%c128) : (i32) -> () - // CHECK: vscale - // CHECK-NEXT: 2 + // CHECK: vscale = 2 %c256 = arith.constant 256 : i32 func.call @setAndCheckVL(%c256) : (i32) -> () - // CHECK: vscale - // CHECK-NEXT: 4 + // CHECK: vscale = 4 %c512 = arith.constant 512 : i32 func.call @setAndCheckVL(%c512) : (i32) -> () - // CHECK: vscale - // CHECK-NEXT: 8 + // CHECK: vscale = 8 %c1024 = arith.constant 1024 : i32 func.call @setAndCheckVL(%c1024) : (i32) -> () - // CHECK: vscale - // CHECK-NEXT: 16 + // CHECK: vscale = 16 %c2048 = arith.constant 2048 : i32 func.call @setAndCheckVL(%c2048) : (i32) -> () diff --git a/mlir/test/mlir-query/function-extraction.mlir b/mlir/test/mlir-query/function-extraction.mlir new file mode 100644 index 00000000000000..a783f65c6761bc --- /dev/null +++ b/mlir/test/mlir-query/function-extraction.mlir @@ -0,0 +1,19 @@ +// RUN: mlir-query %s -c "m hasOpName(\"arith.mulf\").extract(\"testmul\")" | FileCheck %s + +// CHECK: func.func @testmul({{.*}}) -> (f32, f32, f32) { +// CHECK: %[[MUL0:.*]] = arith.mulf {{.*}} : f32 +// CHECK: %[[MUL1:.*]] = arith.mulf {{.*}}, %[[MUL0]] : f32 +// CHECK: %[[MUL2:.*]] = arith.mulf {{.*}} : f32 +// CHECK-NEXT: return %[[MUL0]], %[[MUL1]], %[[MUL2]] : f32, f32, f32 + +func.func @mixedOperations(%a: f32, %b: f32, %c: f32) -> f32 { + %sum0 = arith.addf %a, %b : f32 + %sub0 = arith.subf %sum0, %c : f32 + %mul0 = arith.mulf %a, %sub0 : f32 + %sum1 = arith.addf %b, %c : f32 + %mul1 = arith.mulf %sum1, %mul0 : f32 + %sub2 = arith.subf %mul1, %a : f32 + %sum2 = arith.addf %mul1, %b : f32 + %mul2 = arith.mulf %sub2, %sum2 : f32 + return %mul2 : f32 +}