diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 42b7bee0f8c855..c59ee92c6dce37 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -84,6 +84,66 @@ void verifyfeaturestrings(void) { (void)__builtin_cpu_supports("avx512vp2intersect"); (void)__builtin_cpu_supports("f16c"); (void)__builtin_cpu_supports("avx512fp16"); + (void)__builtin_cpu_supports("3dnow"); + (void)__builtin_cpu_supports("adx"); + (void)__builtin_cpu_supports("cldemote"); + (void)__builtin_cpu_supports("clflushopt"); + (void)__builtin_cpu_supports("clwb"); + (void)__builtin_cpu_supports("clzero"); + (void)__builtin_cpu_supports("cx16"); + (void)__builtin_cpu_supports("enqcmd"); + (void)__builtin_cpu_supports("fsgsbase"); + (void)__builtin_cpu_supports("lwp"); + (void)__builtin_cpu_supports("lzcnt"); + (void)__builtin_cpu_supports("movbe"); + (void)__builtin_cpu_supports("movdir64b"); + (void)__builtin_cpu_supports("movdiri"); + (void)__builtin_cpu_supports("mwaitx"); + (void)__builtin_cpu_supports("pconfig"); + (void)__builtin_cpu_supports("pku"); + (void)__builtin_cpu_supports("prefetchwt1"); + (void)__builtin_cpu_supports("prfchw"); + (void)__builtin_cpu_supports("ptwrite"); + (void)__builtin_cpu_supports("rdpid"); + (void)__builtin_cpu_supports("rdrnd"); + (void)__builtin_cpu_supports("rdseed"); + (void)__builtin_cpu_supports("rtm"); + (void)__builtin_cpu_supports("serialize"); + (void)__builtin_cpu_supports("sgx"); + (void)__builtin_cpu_supports("sha"); + (void)__builtin_cpu_supports("shstk"); + (void)__builtin_cpu_supports("tbm"); + (void)__builtin_cpu_supports("tsxldtrk"); + (void)__builtin_cpu_supports("vaes"); + (void)__builtin_cpu_supports("waitpkg"); + (void)__builtin_cpu_supports("wbnoinvd"); + (void)__builtin_cpu_supports("xsave"); + (void)__builtin_cpu_supports("xsavec"); + (void)__builtin_cpu_supports("xsaveopt"); + (void)__builtin_cpu_supports("xsaves"); + (void)__builtin_cpu_supports("amx-tile"); + (void)__builtin_cpu_supports("amx-int8"); + (void)__builtin_cpu_supports("amx-bf16"); + (void)__builtin_cpu_supports("uintr"); + (void)__builtin_cpu_supports("hreset"); + (void)__builtin_cpu_supports("kl"); + (void)__builtin_cpu_supports("widekl"); + (void)__builtin_cpu_supports("avxvnni"); + (void)__builtin_cpu_supports("avxifma"); + (void)__builtin_cpu_supports("avxvnniint8"); + (void)__builtin_cpu_supports("avxneconvert"); + (void)__builtin_cpu_supports("cmpccxadd"); + (void)__builtin_cpu_supports("amx-fp16"); + (void)__builtin_cpu_supports("prefetchi"); + (void)__builtin_cpu_supports("raoint"); + (void)__builtin_cpu_supports("amx-complex"); + (void)__builtin_cpu_supports("avxvnniint16"); + (void)__builtin_cpu_supports("sm3"); + (void)__builtin_cpu_supports("sha512"); + (void)__builtin_cpu_supports("sm4"); + (void)__builtin_cpu_supports("usermsr"); + (void)__builtin_cpu_supports("avx10.1-256"); + (void)__builtin_cpu_supports("avx10.1-512"); } void verifycpustrings(void) { diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 9a40bd6e5a4101..1afa468c4ae8c1 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -139,20 +139,88 @@ enum ProcessorFeatures { FEATURE_AVX512BITALG, FEATURE_AVX512BF16, FEATURE_AVX512VP2INTERSECT, - - FEATURE_CMPXCHG16B = 46, - FEATURE_F16C = 49, + // FIXME: Below Features has some missings comparing to gcc, it's because gcc + // has some not one-to-one mapped in llvm. + FEATURE_3DNOW, + // FEATURE_3DNOWP, + FEATURE_ADX = 40, + // FEATURE_ABM, + FEATURE_CLDEMOTE = 42, + FEATURE_CLFLUSHOPT, + FEATURE_CLWB, + FEATURE_CLZERO, + FEATURE_CMPXCHG16B, + // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as + // a cpu string with no X86_FEATURE_COMPAT features, which is required in + // current implementantion of cpu_specific/cpu_dispatch FMV feature. + // FEATURE_CMPXCHG8B, + FEATURE_ENQCMD = 48, + FEATURE_F16C, + FEATURE_FSGSBASE, + // FEATURE_FXSAVE, + // FEATURE_HLE, + // FEATURE_IBT, FEATURE_LAHF_LM = 54, FEATURE_LM, - FEATURE_WP, + FEATURE_LWP, FEATURE_LZCNT, FEATURE_MOVBE, - - FEATURE_AVX512FP16 = 94, + FEATURE_MOVDIR64B, + FEATURE_MOVDIRI, + FEATURE_MWAITX, + // FEATURE_OSXSAVE, + FEATURE_PCONFIG = 63, + FEATURE_PKU, + FEATURE_PREFETCHWT1, + FEATURE_PRFCHW, + FEATURE_PTWRITE, + FEATURE_RDPID, + FEATURE_RDRND, + FEATURE_RDSEED, + FEATURE_RTM, + FEATURE_SERIALIZE, + FEATURE_SGX, + FEATURE_SHA, + FEATURE_SHSTK, + FEATURE_TBM, + FEATURE_TSXLDTRK, + FEATURE_VAES, + FEATURE_WAITPKG, + FEATURE_WBNOINVD, + FEATURE_XSAVE, + FEATURE_XSAVEC, + FEATURE_XSAVEOPT, + FEATURE_XSAVES, + FEATURE_AMX_TILE, + FEATURE_AMX_INT8, + FEATURE_AMX_BF16, + FEATURE_UINTR, + FEATURE_HRESET, + FEATURE_KL, + // FEATURE_AESKLE, + FEATURE_WIDEKL = 92, + FEATURE_AVXVNNI, + FEATURE_AVX512FP16, FEATURE_X86_64_BASELINE, FEATURE_X86_64_V2, FEATURE_X86_64_V3, FEATURE_X86_64_V4, + FEATURE_AVXIFMA, + FEATURE_AVXVNNIINT8, + FEATURE_AVXNECONVERT, + FEATURE_CMPCCXADD, + FEATURE_AMX_FP16, + FEATURE_PREFETCHI, + FEATURE_RAOINT, + FEATURE_AMX_COMPLEX, + FEATURE_AVXVNNIINT16, + FEATURE_SM3, + FEATURE_SHA512, + FEATURE_SM4, + // FEATURE_APXF, + FEATURE_USERMSR = 112, + FEATURE_AVX10_1_256, + FEATURE_AVX10_1_512, CPU_FEATURE_MAX }; @@ -746,13 +814,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AES); if ((ECX >> 29) & 1) setFeature(FEATURE_F16C); + if ((ECX >> 30) & 1) + setFeature(FEATURE_RDRND); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); - bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && - ((EAX & 0x6) == 0x6); + bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); #if defined(__APPLE__) // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even the bit is not @@ -760,45 +830,76 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, bool HasAVX512Save = true; #else // AVX512 requires additional context to be saved by the OS. - bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); #endif + // AMX requires additional context to be saved by the OS. + const unsigned AMXBits = (1 << 17) | (1 << 18); + bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); + bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); - if (HasAVX) + if (HasAVXSave) setFeature(FEATURE_AVX); + if (((ECX >> 26) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVE); + bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7 && ((EBX >> 0) & 1)) + setFeature(FEATURE_FSGSBASE); + if (HasLeaf7 && ((EBX >> 2) & 1)) + setFeature(FEATURE_SGX); if (HasLeaf7 && ((EBX >> 3) & 1)) setFeature(FEATURE_BMI); - if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave) setFeature(FEATURE_AVX2); if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 11) & 1)) + setFeature(FEATURE_RTM); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512F); if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 18) & 1)) + setFeature(FEATURE_RDSEED); + if (HasLeaf7 && ((EBX >> 19) & 1)) + setFeature(FEATURE_ADX); if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 24) & 1)) + setFeature(FEATURE_CLWB); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512PF); if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512ER); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 29) & 1)) + setFeature(FEATURE_SHA); if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512BW); if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VL); + if (HasLeaf7 && ((ECX >> 0) & 1)) + setFeature(FEATURE_PREFETCHWT1); if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 4) & 1)) + setFeature(FEATURE_PKU); + if (HasLeaf7 && ((ECX >> 5) & 1)) + setFeature(FEATURE_WAITPKG); if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 7) & 1)) + setFeature(FEATURE_SHSTK); if (HasLeaf7 && ((ECX >> 8) & 1)) setFeature(FEATURE_GFNI); - if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave) + setFeature(FEATURE_VAES); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave) setFeature(FEATURE_VPCLMULQDQ); if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VNNI); @@ -806,20 +907,98 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AVX512BITALG); if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VPOPCNTDQ); + if (HasLeaf7 && ((ECX >> 22) & 1)) + setFeature(FEATURE_RDPID); + if (HasLeaf7 && ((ECX >> 23) & 1)) + setFeature(FEATURE_KL); + if (HasLeaf7 && ((ECX >> 25) & 1)) + setFeature(FEATURE_CLDEMOTE); + if (HasLeaf7 && ((ECX >> 27) & 1)) + setFeature(FEATURE_MOVDIRI); + if (HasLeaf7 && ((ECX >> 28) & 1)) + setFeature(FEATURE_MOVDIR64B); + if (HasLeaf7 && ((ECX >> 29) & 1)) + setFeature(FEATURE_ENQCMD); if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) setFeature(FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 5) & 1)) + setFeature(FEATURE_UINTR); if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VP2INTERSECT); + if (HasLeaf7 && ((EDX >> 14) & 1)) + setFeature(FEATURE_SERIALIZE); + if (HasLeaf7 && ((EDX >> 16) & 1)) + setFeature(FEATURE_TSXLDTRK); + if (HasLeaf7 && ((EDX >> 18) & 1)) + setFeature(FEATURE_PCONFIG); + if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_BF16); if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512FP16); + if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_TILE); + if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_INT8); + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. bool HasLeaf7Subleaf1 = - MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1)) + setFeature(FEATURE_SHA512); + if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1)) + setFeature(FEATURE_SM3); + if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1)) + setFeature(FEATURE_SM4); + if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1)) + setFeature(FEATURE_RAOINT); + if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNI); if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512BF16); + if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1)) + setFeature(FEATURE_CMPCCXADD); + if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_FP16); + if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1)) + setFeature(FEATURE_HRESET); + if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave) + setFeature(FEATURE_AVXIFMA); + + if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNIINT8); + if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave) + setFeature(FEATURE_AVXNECONVERT); + if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_COMPLEX); + if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNIINT16); + if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1)) + setFeature(FEATURE_PREFETCHI); + if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1)) + setFeature(FEATURE_USERMSR); + if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1)) + setFeature(FEATURE_AVX10_1_256); + + unsigned MaxLevel; + getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX); + bool HasLeafD = MaxLevel >= 0xd && + !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVEOPT); + if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVEC); + if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVES); + + bool HasLeaf24 = + MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1)) + setFeature(FEATURE_AVX10_1_512); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -833,14 +1012,40 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_LZCNT); if (((ECX >> 6) & 1)) setFeature(FEATURE_SSE4_A); + if (((ECX >> 8) & 1)) + setFeature(FEATURE_PRFCHW); if (((ECX >> 11) & 1)) setFeature(FEATURE_XOP); + if (((ECX >> 15) & 1)) + setFeature(FEATURE_LWP); if (((ECX >> 16) & 1)) setFeature(FEATURE_FMA4); + if (((ECX >> 21) & 1)) + setFeature(FEATURE_TBM); + if (((ECX >> 29) & 1)) + setFeature(FEATURE_MWAITX); + if (((EDX >> 29) & 1)) setFeature(FEATURE_LM); } + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf8 && ((EBX >> 0) & 1)) + setFeature(FEATURE_CLZERO); + if (HasExtLeaf8 && ((EBX >> 9) & 1)) + setFeature(FEATURE_WBNOINVD); + + bool HasLeaf14 = MaxLevel >= 0x14 && + !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf14 && ((EBX >> 4) & 1)) + setFeature(FEATURE_PTWRITE); + + bool HasLeaf19 = + MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1)) + setFeature(FEATURE_WIDEKL); + if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) { setFeature(FEATURE_X86_64_BASELINE); if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) && diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 43162f2b52ebab..4c630c1eb06e8c 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -173,85 +173,86 @@ X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 34) X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 35) X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 36) X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37) -// Features below here are not in libgcc/compiler-rt. -X86_FEATURE (3DNOW, "3dnow") +// Below Features has some missings comparing to gcc, it's because gcc has some +// not one-to-one mapped in llvm. +X86_FEATURE_COMPAT(3DNOW, "3dnow", 0) X86_FEATURE (3DNOWA, "3dnowa") +X86_FEATURE_COMPAT(ADX, "adx", 0) X86_FEATURE (64BIT, "64bit") -X86_FEATURE (ADX, "adx") -X86_FEATURE (AMX_BF16, "amx-bf16") -X86_FEATURE (AMX_COMPLEX, "amx-complex") -X86_FEATURE (AMX_INT8, "amx-int8") -X86_FEATURE (AMX_TILE, "amx-tile") -X86_FEATURE (CLDEMOTE, "cldemote") -X86_FEATURE (CLFLUSHOPT, "clflushopt") -X86_FEATURE (CLWB, "clwb") -X86_FEATURE_COMPAT(F16C, "f16c", 38) -X86_FEATURE (CLZERO, "clzero") -X86_FEATURE (CMPXCHG16B, "cx16") +X86_FEATURE_COMPAT(CLDEMOTE, "cldemote", 0) +X86_FEATURE_COMPAT(CLFLUSHOPT, "clflushopt", 0) +X86_FEATURE_COMPAT(CLWB, "clwb", 0) +X86_FEATURE_COMPAT(CLZERO, "clzero", 0) +X86_FEATURE_COMPAT(CMPXCHG16B, "cx16", 0) X86_FEATURE (CMPXCHG8B, "cx8") +X86_FEATURE_COMPAT(ENQCMD, "enqcmd", 0) +X86_FEATURE_COMPAT(F16C, "f16c", 0) +X86_FEATURE_COMPAT(FSGSBASE, "fsgsbase", 0) X86_FEATURE (CRC32, "crc32") -X86_FEATURE (ENQCMD, "enqcmd") -X86_FEATURE (FSGSBASE, "fsgsbase") -X86_FEATURE (FXSR, "fxsr") X86_FEATURE (INVPCID, "invpcid") -X86_FEATURE (KL, "kl") -X86_FEATURE (WIDEKL, "widekl") -X86_FEATURE (LWP, "lwp") -X86_FEATURE (LZCNT, "lzcnt") -X86_FEATURE (MOVBE, "movbe") -X86_FEATURE (MOVDIR64B, "movdir64b") -X86_FEATURE (MOVDIRI, "movdiri") -X86_FEATURE (MWAITX, "mwaitx") -X86_FEATURE (PCONFIG, "pconfig") -X86_FEATURE (PKU, "pku") -X86_FEATURE (PREFETCHI, "prefetchi") -X86_FEATURE (PREFETCHWT1, "prefetchwt1") -X86_FEATURE (PRFCHW, "prfchw") -X86_FEATURE (PTWRITE, "ptwrite") -X86_FEATURE (RDPID, "rdpid") X86_FEATURE (RDPRU, "rdpru") -X86_FEATURE (RDRND, "rdrnd") -X86_FEATURE (RDSEED, "rdseed") -X86_FEATURE (RTM, "rtm") X86_FEATURE (SAHF, "sahf") -X86_FEATURE (SERIALIZE, "serialize") -X86_FEATURE (SGX, "sgx") -X86_FEATURE (SHA, "sha") -X86_FEATURE (SHSTK, "shstk") -X86_FEATURE (TBM, "tbm") -X86_FEATURE (TSXLDTRK, "tsxldtrk") -X86_FEATURE (UINTR, "uintr") -X86_FEATURE (VAES, "vaes") X86_FEATURE (VZEROUPPER, "vzeroupper") -X86_FEATURE (WAITPKG, "waitpkg") -X86_FEATURE (WBNOINVD, "wbnoinvd") +X86_FEATURE_COMPAT(LWP, "lwp", 0) +X86_FEATURE_COMPAT(LZCNT, "lzcnt", 0) +X86_FEATURE_COMPAT(MOVBE, "movbe", 0) +X86_FEATURE_COMPAT(MOVDIR64B, "movdir64b", 0) +X86_FEATURE_COMPAT(MOVDIRI, "movdiri", 0) +X86_FEATURE_COMPAT(MWAITX, "mwaitx", 0) X86_FEATURE (X87, "x87") -X86_FEATURE (XSAVE, "xsave") -X86_FEATURE (XSAVEC, "xsavec") -X86_FEATURE (XSAVEOPT, "xsaveopt") -X86_FEATURE (XSAVES, "xsaves") -X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 39) -X86_FEATURE (HRESET, "hreset") -X86_FEATURE (RAOINT, "raoint") -X86_FEATURE (AMX_FP16, "amx-fp16") -X86_FEATURE (CMPCCXADD, "cmpccxadd") -X86_FEATURE (AVXNECONVERT, "avxneconvert") -X86_FEATURE (AVXVNNI, "avxvnni") -X86_FEATURE (AVXIFMA, "avxifma") -X86_FEATURE (AVXVNNIINT8, "avxvnniint8") -X86_FEATURE (SHA512, "sha512") -X86_FEATURE (SM3, "sm3") -X86_FEATURE (SM4, "sm4") -X86_FEATURE (AVXVNNIINT16, "avxvnniint16") -X86_FEATURE (EVEX512, "evex512") -X86_FEATURE (AVX10_1, "avx10.1-256") -X86_FEATURE (AVX10_1_512, "avx10.1-512") -X86_FEATURE (USERMSR, "usermsr") -X86_FEATURE (EGPR, "egpr") +X86_FEATURE_COMPAT(PCONFIG, "pconfig", 0) +X86_FEATURE_COMPAT(PKU, "pku", 0) +X86_FEATURE_COMPAT(PREFETCHWT1, "prefetchwt1", 0) +X86_FEATURE_COMPAT(PRFCHW, "prfchw", 0) +X86_FEATURE_COMPAT(PTWRITE, "ptwrite", 0) +X86_FEATURE_COMPAT(RDPID, "rdpid", 0) +X86_FEATURE_COMPAT(RDRND, "rdrnd", 0) +X86_FEATURE_COMPAT(RDSEED, "rdseed", 0) +X86_FEATURE_COMPAT(RTM, "rtm", 0) +X86_FEATURE_COMPAT(SERIALIZE, "serialize", 0) +X86_FEATURE_COMPAT(SGX, "sgx", 0) +X86_FEATURE_COMPAT(SHA, "sha", 0) +X86_FEATURE_COMPAT(SHSTK, "shstk", 0) +X86_FEATURE_COMPAT(TBM, "tbm", 0) +X86_FEATURE_COMPAT(TSXLDTRK, "tsxldtrk", 0) +X86_FEATURE_COMPAT(VAES, "vaes", 0) +X86_FEATURE_COMPAT(WAITPKG, "waitpkg", 0) +X86_FEATURE_COMPAT(WBNOINVD, "wbnoinvd", 0) +X86_FEATURE_COMPAT(XSAVE, "xsave", 0) +X86_FEATURE_COMPAT(XSAVEC, "xsavec", 0) +X86_FEATURE_COMPAT(XSAVEOPT, "xsaveopt", 0) +X86_FEATURE_COMPAT(XSAVES, "xsaves", 0) +X86_FEATURE_COMPAT(AMX_TILE, "amx-tile", 0) +X86_FEATURE_COMPAT(AMX_INT8, "amx-int8", 0) +X86_FEATURE_COMPAT(AMX_BF16, "amx-bf16", 0) +X86_FEATURE_COMPAT(UINTR, "uintr", 0) +X86_FEATURE_COMPAT(HRESET, "hreset", 0) +X86_FEATURE_COMPAT(KL, "kl", 0) +X86_FEATURE (FXSR, "fxsr") +X86_FEATURE_COMPAT(WIDEKL, "widekl", 0) +X86_FEATURE_COMPAT(AVXVNNI, "avxvnni", 0) +X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 0) +X86_FEATURE (CCMP, "ccmp") X86_FEATURE (Push2Pop2, "push2pop2") X86_FEATURE (PPX, "ppx") X86_FEATURE (NDD, "ndd") -X86_FEATURE (CCMP, "ccmp") +X86_FEATURE_COMPAT(AVXIFMA, "avxifma", 0) +X86_FEATURE_COMPAT(AVXVNNIINT8, "avxvnniint8", 0) +X86_FEATURE_COMPAT(AVXNECONVERT, "avxneconvert", 0) +X86_FEATURE_COMPAT(CMPCCXADD, "cmpccxadd", 0) +X86_FEATURE_COMPAT(AMX_FP16, "amx-fp16", 0) +X86_FEATURE_COMPAT(PREFETCHI, "prefetchi", 0) +X86_FEATURE_COMPAT(RAOINT, "raoint", 0) +X86_FEATURE_COMPAT(AMX_COMPLEX, "amx-complex", 0) +X86_FEATURE_COMPAT(AVXVNNIINT16, "avxvnniint16", 0) +X86_FEATURE_COMPAT(SM3, "sm3", 0) +X86_FEATURE_COMPAT(SHA512, "sha512", 0) +X86_FEATURE_COMPAT(SM4, "sm4", 0) +X86_FEATURE (EGPR, "egpr") +X86_FEATURE_COMPAT(USERMSR, "usermsr", 0) +X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 0) +X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 0) +X86_FEATURE (EVEX512, "evex512") X86_FEATURE (CF, "cf") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 472b4a2d439a69..892bfbd62f0d02 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1387,10 +1387,8 @@ static MachineMemOperand *getStackAlignedMMO(SDValue StackPtr, int FI = cast(StackPtr)->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); uint64_t ObjectSize = isObjectScalable ? ~UINT64_C(0) : MFI.getObjectSize(FI); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOStore, ObjectSize, MFI.getObjectAlign(FI)); - - return MMO; + return MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + ObjectSize, MFI.getObjectAlign(FI)); } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b4bde4c5dd5dd0..eca560f08e22f6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -263,6 +263,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + if (RV64LegalI32 && Subtarget.is64Bit()) + setOperationAction(ISD::VAARG, MVT::i32, Promote); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e0864071e44fcb..6dc3df2e72d26b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -430,10 +430,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Custom); setOperationAction(ISD::CTPOP , MVT::i32 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::CTPOP , MVT::i64 , Custom); - else - setOperationAction(ISD::CTPOP , MVT::i64 , Custom); + setOperationAction(ISD::CTPOP , MVT::i64 , Custom); } setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 518fb9d8921647..21f46f576490a8 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -750,13 +750,16 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) { #ifndef NDEBUG // Check that priorities are set properly in the .def file. We expect that // "compat" features are assigned non-duplicate consecutive priorities - // starting from zero (0, 1, ..., num_features - 1). + // starting from one (1, ..., 37) and multiple zeros. #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY, unsigned Priorities[] = { #include "llvm/TargetParser/X86TargetParser.def" }; std::array HelperList; - std::iota(HelperList.begin(), HelperList.end(), 0); + const size_t MaxPriority = 37; + std::iota(HelperList.begin(), HelperList.begin() + MaxPriority + 1, 0); + for (size_t i = MaxPriority + 1; i != std::size(Priorities); ++i) + HelperList[i] = 0; assert(std::is_permutation(HelperList.begin(), HelperList.end(), std::begin(Priorities), std::end(Priorities)) && "Priorities don't form consecutive range!"); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index ae8e2292519cb7..97cf5ebe3ca064 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1065,6 +1065,9 @@ void State::addInfoFor(BasicBlock &BB) { case Intrinsic::umax: case Intrinsic::smin: case Intrinsic::smax: + // TODO: handle llvm.abs as well + WorkList.push_back( + FactOrCheck::getCheck(DT.getNode(&BB), cast(&I))); // TODO: Check if it is possible to instead only added the min/max facts // when simplifying uses of the min/max intrinsics. if (!isGuaranteedNotToBePoison(&I)) @@ -1395,6 +1398,26 @@ static bool checkAndReplaceCondition( return false; } +static bool checkAndReplaceMinMax(MinMaxIntrinsic *MinMax, ConstraintInfo &Info, + SmallVectorImpl &ToRemove) { + auto ReplaceMinMaxWithOperand = [&](MinMaxIntrinsic *MinMax, bool UseLHS) { + // TODO: generate reproducer for min/max. + MinMax->replaceAllUsesWith(MinMax->getOperand(UseLHS ? 0 : 1)); + ToRemove.push_back(MinMax); + return true; + }; + + ICmpInst::Predicate Pred = + ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); + if (auto ImpliedCondition = checkCondition( + Pred, MinMax->getOperand(0), MinMax->getOperand(1), MinMax, Info)) + return ReplaceMinMaxWithOperand(MinMax, *ImpliedCondition); + if (auto ImpliedCondition = checkCondition( + Pred, MinMax->getOperand(1), MinMax->getOperand(0), MinMax, Info)) + return ReplaceMinMaxWithOperand(MinMax, !*ImpliedCondition); + return false; +} + static void removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info, Module *ReproducerModule, @@ -1695,6 +1718,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ReproducerCondStack, DFSInStack); } Changed |= Simplified; + } else if (auto *MinMax = dyn_cast(Inst)) { + Changed |= checkAndReplaceMinMax(MinMax, Info, ToRemove); } continue; } diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/vararg.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/vararg.ll new file mode 100644 index 00000000000000..2fb674f5608046 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/vararg.ll @@ -0,0 +1,1391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs \ +; RUN: -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d -target-abi lp64f \ +; RUN: -verify-machineinstrs -riscv-experimental-rv64-legal-i32 \ +; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d -target-abi lp64d \ +; RUN: -verify-machineinstrs -riscv-experimental-rv64-legal-i32 \ +; RUN: | FileCheck -check-prefix=LP64-LP64F-LP64D-FPELIM %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -verify-machineinstrs -frame-pointer=all \ +; RUN: -riscv-experimental-rv64-legal-i32 | FileCheck -check-prefix=LP64-LP64F-LP64D-WITHFP %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e \ +; RUN: -verify-machineinstrs -riscv-experimental-rv64-legal-i32 \ +; RUN: | FileCheck -check-prefix=LP64E-FPELIM %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -target-abi lp64e -frame-pointer=all \ +; RUN: -verify-machineinstrs -riscv-experimental-rv64-legal-i32 \ +; RUN: | FileCheck -check-prefix=LP64E-WITHFP %s + +; The same vararg calling convention is used for ilp32/ilp32f/ilp32d and for +; lp64/lp64f/lp64d. Different CHECK lines are required for RV32D due to slight +; codegen differences due to the way the f64 load operations are lowered. +; The nounwind attribute is omitted for some of the tests, to check that CFI +; directives are correctly generated. + +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @notdead(ptr) + +; Although frontends are recommended to not generate va_arg due to the lack of +; support for aggregate types, we test simple cases here to ensure they are +; lowered correctly + +define i32 @va1(ptr %fmt, ...) { +; LP64-LP64F-LP64D-FPELIM-LABEL: va1: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 28 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va1: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa_offset 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_offset ra, -72 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_offset s0, -80 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa s0, 64 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 28 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 12 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %1 = load i32, ptr %argp.cur, align 4 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +define i32 @va1_va_arg(ptr %fmt, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va1_va_arg: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +; Ensure the adjustment when restoring the stack pointer using the frame +; pointer is correct +define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va1_va_arg_alloca: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv s1, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, s0, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, -32(s0) +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: srli a0, a0, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 15 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sub a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, s1 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, s0, -32 +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va1_va_arg_alloca: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: srli a0, a0, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sub a0, sp, a0 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv sp, a0 +; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, s1 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, s0, -32 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_va_arg_alloca: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: addi s0, sp, 32 +; LP64E-FPELIM-NEXT: mv s1, a1 +; LP64E-FPELIM-NEXT: sd a5, 40(s0) +; LP64E-FPELIM-NEXT: sd a4, 32(s0) +; LP64E-FPELIM-NEXT: sd a3, 24(s0) +; LP64E-FPELIM-NEXT: sd a2, 16(s0) +; LP64E-FPELIM-NEXT: sd a1, 8(s0) +; LP64E-FPELIM-NEXT: addi a0, s0, 16 +; LP64E-FPELIM-NEXT: sd a0, -32(s0) +; LP64E-FPELIM-NEXT: slli a0, a1, 32 +; LP64E-FPELIM-NEXT: srli a0, a0, 32 +; LP64E-FPELIM-NEXT: addi a0, a0, 7 +; LP64E-FPELIM-NEXT: andi a0, a0, -8 +; LP64E-FPELIM-NEXT: sub a0, sp, a0 +; LP64E-FPELIM-NEXT: mv sp, a0 +; LP64E-FPELIM-NEXT: call notdead +; LP64E-FPELIM-NEXT: mv a0, s1 +; LP64E-FPELIM-NEXT: addi sp, s0, -32 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_va_arg_alloca: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: slli a0, a1, 32 +; LP64E-WITHFP-NEXT: srli a0, a0, 32 +; LP64E-WITHFP-NEXT: addi a0, a0, 7 +; LP64E-WITHFP-NEXT: andi a0, a0, -8 +; LP64E-WITHFP-NEXT: sub a0, sp, a0 +; LP64E-WITHFP-NEXT: mv sp, a0 +; LP64E-WITHFP-NEXT: call notdead +; LP64E-WITHFP-NEXT: mv a0, s1 +; LP64E-WITHFP-NEXT: addi sp, s0, -32 +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + %2 = alloca i8, i32 %1 + call void @notdead(ptr %2) + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +define void @va1_caller() nounwind { +; Pass a double, as a float would be promoted by a C/C++ frontend +; LP64-LP64F-LP64D-FPELIM-LABEL: va1_caller: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 1023 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 52 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a2, 2 +; LP64-LP64F-LP64D-FPELIM-NEXT: call va1 +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va1_caller: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 1023 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 52 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a2, 2 +; LP64-LP64F-LP64D-WITHFP-NEXT: call va1 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va1_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: li a2, 2 +; LP64E-FPELIM-NEXT: call va1 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va1_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: li a2, 2 +; LP64E-WITHFP-NEXT: call va1 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret + %1 = call i32 (ptr, ...) @va1(ptr undef, double 1.0, i32 2) + ret void +} + +; Ensure that 2x xlen size+alignment varargs are accessed via an "aligned" +; register pair (where the first register is even-numbered). + +define i64 @va2(ptr %fmt, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va2: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 39 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va2: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 23 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 39 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 23 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va + %ptrint = ptrtoint ptr %argp.cur to iXLen + %1 = add iXLen %ptrint, 7 + %2 = and iXLen %1, -8 + %argp.cur.aligned = inttoptr iXLen %1 to ptr + %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i32 8 + store ptr %argp.next, ptr %va + %3 = inttoptr iXLen %2 to ptr + %4 = load double, ptr %3, align 8 + %5 = bitcast double %4 to i64 + call void @llvm.va_end(ptr %va) + ret i64 %5 +} + +define i64 @va2_va_arg(ptr %fmt, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va2_va_arg: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va2_va_arg: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 32 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 16 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, double + call void @llvm.va_end(ptr %va) + %2 = bitcast double %1 to i64 + ret i64 %2 +} + +define void @va2_caller() nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va2_caller: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 1023 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 52 +; LP64-LP64F-LP64D-FPELIM-NEXT: call va2 +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va2_caller: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 1023 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 52 +; LP64-LP64F-LP64D-WITHFP-NEXT: call va2 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va2_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a1, 1023 +; LP64E-FPELIM-NEXT: slli a1, a1, 52 +; LP64E-FPELIM-NEXT: call va2 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va2_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a1, 1023 +; LP64E-WITHFP-NEXT: slli a1, a1, 52 +; LP64E-WITHFP-NEXT: call va2 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret + %1 = call i64 (ptr, ...) @va2(ptr undef, double 1.000000e+00) + ret void +} + +; On RV32, Ensure a named 2*xlen argument is passed in a1 and a2, while the +; vararg double is passed in a4 and a5 (rather than a3 and a4) + +define i64 @va3(i32 %a, i64 %b, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va3: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 31 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va3: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a3, sp, 31 +; LP64E-FPELIM-NEXT: add a0, a1, a2 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: addi a3, s0, 15 +; LP64E-WITHFP-NEXT: add a0, a1, a2 +; LP64E-WITHFP-NEXT: sd a3, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va + %ptrint = ptrtoint ptr %argp.cur to iXLen + %1 = add iXLen %ptrint, 7 + %2 = and iXLen %1, -8 + %argp.cur.aligned = inttoptr iXLen %1 to ptr + %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i32 8 + store ptr %argp.next, ptr %va + %3 = inttoptr iXLen %2 to ptr + %4 = load double, ptr %3, align 8 + call void @llvm.va_end(ptr %va) + %5 = bitcast double %4 to i64 + %6 = add i64 %b, %5 + ret i64 %6 +} + +define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 24 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va3_va_arg: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_va_arg: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -48 +; LP64E-FPELIM-NEXT: sd a5, 40(sp) +; LP64E-FPELIM-NEXT: sd a4, 32(sp) +; LP64E-FPELIM-NEXT: sd a3, 24(sp) +; LP64E-FPELIM-NEXT: sd a2, 16(sp) +; LP64E-FPELIM-NEXT: addi a3, sp, 24 +; LP64E-FPELIM-NEXT: add a0, a1, a2 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 48 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_va_arg: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 24(s0) +; LP64E-WITHFP-NEXT: sd a4, 16(s0) +; LP64E-WITHFP-NEXT: sd a3, 8(s0) +; LP64E-WITHFP-NEXT: sd a2, 0(s0) +; LP64E-WITHFP-NEXT: addi a3, s0, 8 +; LP64E-WITHFP-NEXT: add a0, a1, a2 +; LP64E-WITHFP-NEXT: sd a3, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, double + call void @llvm.va_end(ptr %va) + %2 = bitcast double %1 to i64 + %3 = add i64 %b, %2 + ret i64 %3 +} + +define void @va3_caller() nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va3_caller: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: li a2, 1 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a2, a2, 62 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 2 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 1111 +; LP64-LP64F-LP64D-FPELIM-NEXT: call va3 +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va3_caller: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a2, 1 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a2, a2, 62 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 2 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 1111 +; LP64-LP64F-LP64D-WITHFP-NEXT: call va3 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va3_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -8 +; LP64E-FPELIM-NEXT: sd ra, 0(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a2, 1 +; LP64E-FPELIM-NEXT: slli a2, a2, 62 +; LP64E-FPELIM-NEXT: li a0, 2 +; LP64E-FPELIM-NEXT: li a1, 1111 +; LP64E-FPELIM-NEXT: call va3 +; LP64E-FPELIM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 8 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va3_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -16 +; LP64E-WITHFP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 16 +; LP64E-WITHFP-NEXT: li a2, 1 +; LP64E-WITHFP-NEXT: slli a2, a2, 62 +; LP64E-WITHFP-NEXT: li a0, 2 +; LP64E-WITHFP-NEXT: li a1, 1111 +; LP64E-WITHFP-NEXT: call va3 +; LP64E-WITHFP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 16 +; LP64E-WITHFP-NEXT: ret + %1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, double 2.000000e+00) + ret void +} + +declare void @llvm.va_copy(ptr, ptr) + +define i32 @va4_va_copy(i32 %argno, ...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va4_va_copy: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: mv s0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 88(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 80(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 48 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 3 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a0, 8 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lw a1, 0(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 11 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a2, a0, 8 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lw a2, 0(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 11 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, a0, 8 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 0(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, s0 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, a1, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: addw a0, a1, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va4_va_copy: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -112 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a0, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 3 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lw a1, 0(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 11 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a2, a0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lw a2, 0(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 11 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -4 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, a0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 0(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, s1 +; LP64-LP64F-LP64D-WITHFP-NEXT: add a1, a1, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: addw a0, a1, a0 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 112 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va4_va_copy: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -80 +; LP64E-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: mv s0, a1 +; LP64E-FPELIM-NEXT: sd a5, 72(sp) +; LP64E-FPELIM-NEXT: sd a4, 64(sp) +; LP64E-FPELIM-NEXT: sd a3, 56(sp) +; LP64E-FPELIM-NEXT: sd a2, 48(sp) +; LP64E-FPELIM-NEXT: sd a1, 40(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 48 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: sd a0, 0(sp) +; LP64E-FPELIM-NEXT: call notdead +; LP64E-FPELIM-NEXT: ld a0, 8(sp) +; LP64E-FPELIM-NEXT: addi a0, a0, 3 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a1, a0, 8 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: lw a1, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a2, a0, 8 +; LP64E-FPELIM-NEXT: sd a2, 8(sp) +; LP64E-FPELIM-NEXT: lw a2, 0(a0) +; LP64E-FPELIM-NEXT: addi a0, a0, 11 +; LP64E-FPELIM-NEXT: andi a0, a0, -4 +; LP64E-FPELIM-NEXT: addi a3, a0, 8 +; LP64E-FPELIM-NEXT: sd a3, 8(sp) +; LP64E-FPELIM-NEXT: lw a0, 0(a0) +; LP64E-FPELIM-NEXT: add a1, a1, s0 +; LP64E-FPELIM-NEXT: add a1, a1, a2 +; LP64E-FPELIM-NEXT: addw a0, a1, a0 +; LP64E-FPELIM-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 80 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va4_va_copy: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -88 +; LP64E-WITHFP-NEXT: sd ra, 32(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s1, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 40 +; LP64E-WITHFP-NEXT: mv s1, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 16 +; LP64E-WITHFP-NEXT: sd a0, -32(s0) +; LP64E-WITHFP-NEXT: sd a0, -40(s0) +; LP64E-WITHFP-NEXT: call notdead +; LP64E-WITHFP-NEXT: ld a0, -32(s0) +; LP64E-WITHFP-NEXT: addi a0, a0, 3 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a1, a0, 8 +; LP64E-WITHFP-NEXT: sd a1, -32(s0) +; LP64E-WITHFP-NEXT: lw a1, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a2, a0, 8 +; LP64E-WITHFP-NEXT: sd a2, -32(s0) +; LP64E-WITHFP-NEXT: lw a2, 0(a0) +; LP64E-WITHFP-NEXT: addi a0, a0, 11 +; LP64E-WITHFP-NEXT: andi a0, a0, -4 +; LP64E-WITHFP-NEXT: addi a3, a0, 8 +; LP64E-WITHFP-NEXT: sd a3, -32(s0) +; LP64E-WITHFP-NEXT: lw a0, 0(a0) +; LP64E-WITHFP-NEXT: add a1, a1, s1 +; LP64E-WITHFP-NEXT: add a1, a1, a2 +; LP64E-WITHFP-NEXT: addw a0, a1, a0 +; LP64E-WITHFP-NEXT: ld ra, 32(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s1, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 88 +; LP64E-WITHFP-NEXT: ret + %vargs = alloca ptr + %wargs = alloca ptr + call void @llvm.va_start(ptr %vargs) + %1 = va_arg ptr %vargs, i32 + call void @llvm.va_copy(ptr %wargs, ptr %vargs) + %2 = load ptr, ptr %wargs, align 4 + call void @notdead(ptr %2) + %3 = va_arg ptr %vargs, i32 + %4 = va_arg ptr %vargs, i32 + %5 = va_arg ptr %vargs, i32 + call void @llvm.va_end(ptr %vargs) + call void @llvm.va_end(ptr %wargs) + %add1 = add i32 %3, %1 + %add2 = add i32 %add1, %4 + %add3 = add i32 %add2, %5 + ret i32 %add3 +} + +; Check 2x*xlen values are aligned appropriately when passed on the stack in a vararg call + +declare i32 @va5_aligned_stack_callee(i32, ...) + +define void @va5_aligned_stack_caller() nounwind { +; The double should be 8-byte aligned on the stack, but the two-element array +; should only be 4-byte aligned +; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 17 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 15 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 2384 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a6, a0, 761 +; LP64-LP64F-LP64D-FPELIM-NEXT: slli a6, a6, 11 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 1 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 11 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a5, 13 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 14 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: call va5_aligned_stack_callee +; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 48 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va5_aligned_stack_caller: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -48 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 17 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 24(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 16(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld t0, %lo(.LCPI11_0)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 2384 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a6, a0, 761 +; LP64-LP64F-LP64D-WITHFP-NEXT: slli a6, a6, 11 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 1 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 11 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a5, 13 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 14 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 0(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: call va5_aligned_stack_callee +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 48 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va5_aligned_stack_caller: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -56 +; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill +; LP64E-FPELIM-NEXT: li a0, 17 +; LP64E-FPELIM-NEXT: sd a0, 40(sp) +; LP64E-FPELIM-NEXT: li a0, 16 +; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-FPELIM-NEXT: sd a0, 32(sp) +; LP64E-FPELIM-NEXT: li a0, 15 +; LP64E-FPELIM-NEXT: sd a0, 24(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) +; LP64E-FPELIM-NEXT: li a0, 14 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 2384 +; LP64E-FPELIM-NEXT: addiw a0, a0, 761 +; LP64E-FPELIM-NEXT: slli a6, a0, 11 +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-FPELIM-NEXT: li a0, 1 +; LP64E-FPELIM-NEXT: li a1, 11 +; LP64E-FPELIM-NEXT: li a4, 12 +; LP64E-FPELIM-NEXT: li a5, 13 +; LP64E-FPELIM-NEXT: sd a6, 0(sp) +; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee +; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload +; LP64E-FPELIM-NEXT: addi sp, sp, 56 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va5_aligned_stack_caller: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -64 +; LP64E-WITHFP-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 64 +; LP64E-WITHFP-NEXT: li a0, 17 +; LP64E-WITHFP-NEXT: sd a0, 40(sp) +; LP64E-WITHFP-NEXT: li a0, 16 +; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0) +; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; LP64E-WITHFP-NEXT: sd a0, 32(sp) +; LP64E-WITHFP-NEXT: li a0, 15 +; LP64E-WITHFP-NEXT: sd a0, 24(sp) +; LP64E-WITHFP-NEXT: sd a1, 16(sp) +; LP64E-WITHFP-NEXT: li a0, 14 +; LP64E-WITHFP-NEXT: sd a0, 8(sp) +; LP64E-WITHFP-NEXT: lui a0, 2384 +; LP64E-WITHFP-NEXT: addiw a0, a0, 761 +; LP64E-WITHFP-NEXT: slli a6, a0, 11 +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1) +; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0) +; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2) +; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a0) +; LP64E-WITHFP-NEXT: li a0, 1 +; LP64E-WITHFP-NEXT: li a1, 11 +; LP64E-WITHFP-NEXT: li a4, 12 +; LP64E-WITHFP-NEXT: li a5, 13 +; LP64E-WITHFP-NEXT: sd a6, 0(sp) +; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee +; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 64 +; LP64E-WITHFP-NEXT: ret + %1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11, + fp128 0xLEB851EB851EB851F400091EB851EB851, i32 12, i32 13, i64 20000000000, + i32 14, double 2.720000e+00, i32 15, [2 x i32] [i32 16, i32 17]) + ret void +} + +; A function with no fixed arguments is not valid C, but can be +; specified in LLVM IR. We must ensure the vararg save area is +; still set up correctly. + +define i32 @va6_no_fixed_args(...) nounwind { +; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 24 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va6_no_fixed_args: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -96 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va6_no_fixed_args: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: addi sp, sp, -64 +; LP64E-FPELIM-NEXT: sd a5, 56(sp) +; LP64E-FPELIM-NEXT: sd a4, 48(sp) +; LP64E-FPELIM-NEXT: sd a3, 40(sp) +; LP64E-FPELIM-NEXT: sd a2, 32(sp) +; LP64E-FPELIM-NEXT: sd a1, 24(sp) +; LP64E-FPELIM-NEXT: sd a0, 16(sp) +; LP64E-FPELIM-NEXT: addi a1, sp, 24 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: addi sp, sp, 64 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va6_no_fixed_args: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -80 +; LP64E-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: addi s0, sp, 32 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: sd a0, 0(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 8 +; LP64E-WITHFP-NEXT: sd a1, -24(s0) +; LP64E-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 80 +; LP64E-WITHFP-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +; TODO: improve constant materialization of stack addresses + +define i32 @va_large_stack(ptr %fmt, ...) { +; LP64-LP64F-LP64D-FPELIM-LABEL: va_large_stack: +; LP64-LP64F-LP64D-FPELIM: # %bb.0: +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 336 +; LP64-LP64F-LP64D-FPELIM-NEXT: sub sp, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 100000080 +; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui t0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add t0, sp, t0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(t0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a7, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a7, sp, a7 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a7) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a6, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a6, sp, a6 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 312(a6) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a5, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a5, sp, a5 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a5) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a4, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a4, sp, a4 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a4) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a3, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a3, sp, a3 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 288(a3) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a2, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a2, sp, a2 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 280(a2) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 284 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 336 +; LP64-LP64F-LP64D-FPELIM-NEXT: add sp, sp, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: ret +; +; LP64-LP64F-LP64D-WITHFP-LABEL: va_large_stack: +; LP64-LP64F-LP64D-WITHFP: # %bb.0: +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, -2032 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa_offset 2032 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 1960(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 1952(sp) # 8-byte Folded Spill +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_offset ra, -72 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_offset s0, -80 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 1968 +; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa s0, 64 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1680 +; LP64-LP64F-LP64D-WITHFP-NEXT: sub sp, sp, a0 +; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a2, 24414 +; LP64-LP64F-LP64D-WITHFP-NEXT: sub a2, s0, a2 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -288(a2) +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a1, a1, -1680 +; LP64-LP64F-LP64D-WITHFP-NEXT: add sp, sp, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 1960(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 1952(sp) # 8-byte Folded Reload +; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 2032 +; LP64-LP64F-LP64D-WITHFP-NEXT: ret +; +; LP64E-FPELIM-LABEL: va_large_stack: +; LP64E-FPELIM: # %bb.0: +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 320 +; LP64E-FPELIM-NEXT: sub sp, sp, a0 +; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064 +; LP64E-FPELIM-NEXT: mv a0, a1 +; LP64E-FPELIM-NEXT: lui a6, 24414 +; LP64E-FPELIM-NEXT: add a6, sp, a6 +; LP64E-FPELIM-NEXT: sd a5, 312(a6) +; LP64E-FPELIM-NEXT: lui a5, 24414 +; LP64E-FPELIM-NEXT: add a5, sp, a5 +; LP64E-FPELIM-NEXT: sd a4, 304(a5) +; LP64E-FPELIM-NEXT: lui a4, 24414 +; LP64E-FPELIM-NEXT: add a4, sp, a4 +; LP64E-FPELIM-NEXT: sd a3, 296(a4) +; LP64E-FPELIM-NEXT: lui a3, 24414 +; LP64E-FPELIM-NEXT: add a3, sp, a3 +; LP64E-FPELIM-NEXT: sd a2, 288(a3) +; LP64E-FPELIM-NEXT: lui a2, 24414 +; LP64E-FPELIM-NEXT: add a2, sp, a2 +; LP64E-FPELIM-NEXT: sd a1, 280(a2) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: addiw a1, a1, 284 +; LP64E-FPELIM-NEXT: add a1, sp, a1 +; LP64E-FPELIM-NEXT: sd a1, 8(sp) +; LP64E-FPELIM-NEXT: lui a1, 24414 +; LP64E-FPELIM-NEXT: addiw a1, a1, 320 +; LP64E-FPELIM-NEXT: add sp, sp, a1 +; LP64E-FPELIM-NEXT: ret +; +; LP64E-WITHFP-LABEL: va_large_stack: +; LP64E-WITHFP: # %bb.0: +; LP64E-WITHFP-NEXT: addi sp, sp, -2040 +; LP64E-WITHFP-NEXT: .cfi_def_cfa_offset 2040 +; LP64E-WITHFP-NEXT: sd ra, 1984(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: sd s0, 1976(sp) # 8-byte Folded Spill +; LP64E-WITHFP-NEXT: .cfi_offset ra, -56 +; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 +; LP64E-WITHFP-NEXT: addi s0, sp, 1992 +; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 +; LP64E-WITHFP-NEXT: lui a0, 24414 +; LP64E-WITHFP-NEXT: addiw a0, a0, -1704 +; LP64E-WITHFP-NEXT: sub sp, sp, a0 +; LP64E-WITHFP-NEXT: mv a0, a1 +; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: sd a3, 24(s0) +; LP64E-WITHFP-NEXT: sd a2, 16(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) +; LP64E-WITHFP-NEXT: addi a1, s0, 12 +; LP64E-WITHFP-NEXT: lui a2, 24414 +; LP64E-WITHFP-NEXT: sub a2, s0, a2 +; LP64E-WITHFP-NEXT: sd a1, -288(a2) +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: addiw a1, a1, -1704 +; LP64E-WITHFP-NEXT: add sp, sp, a1 +; LP64E-WITHFP-NEXT: ld ra, 1984(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: ld s0, 1976(sp) # 8-byte Folded Reload +; LP64E-WITHFP-NEXT: addi sp, sp, 2040 +; LP64E-WITHFP-NEXT: ret + %large = alloca [ 100000000 x i8 ] + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %1 = load i32, ptr %argp.cur, align 4 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} diff --git a/llvm/test/Transforms/ConstraintElimination/minmax.ll b/llvm/test/Transforms/ConstraintElimination/minmax.ll index 82b932f14c4ffa..68513ea10ad0fe 100644 --- a/llvm/test/Transforms/ConstraintElimination/minmax.ll +++ b/llvm/test/Transforms/ConstraintElimination/minmax.ll @@ -343,6 +343,264 @@ end: ret i32 0 } +; Test from PR75155 +define i32 @simplify_slt_smax_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_slt_smax_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[B]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_slt_smax_val_commuted(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_slt_smax_val_commuted +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[B]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %add, i32 %b) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_slt_smax_val_at_use(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_slt_smax_val_at_use +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 [[ADD]]) +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: ret i32 [[MAX]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp slt i32 %a, %b + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + br i1 %cmp, label %then, label %else +then: + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_sgt_smax_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_sgt_smax_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[ADD]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp sgt i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_sle_smax_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_sle_smax_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 [[ADD]]) +; CHECK-NEXT: ret i32 [[MAX]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp sle i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_sge_smax_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_sge_smax_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[ADD]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp sge i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_ult_umax_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_ult_umax_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[B]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nuw i32 %a, 1 + %max = call i32 @llvm.umax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_slt_smin_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_slt_smin_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[ADD]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smin.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_ult_umin_val(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_ult_umin_val +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[ADD]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nuw i32 %a, 1 + %max = call i32 @llvm.umin.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_slt_smax_val_fail1(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_slt_smax_val_fail1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 2 +; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 [[ADD]]) +; CHECK-NEXT: ret i32 [[MAX]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 2 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + +define i32 @simplify_ult_smax_val_fail2(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @simplify_ult_smax_val_fail2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 [[ADD]]) +; CHECK-NEXT: ret i32 [[MAX]] +; CHECK: else: +; CHECK-NEXT: ret i32 -1 +; +start: + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %then, label %else +then: + %add = add nsw i32 %a, 1 + %max = call i32 @llvm.smax.i32(i32 %b, i32 %add) + ret i32 %max +else: + ret i32 -1 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) diff --git a/mlir/include/mlir/Conversion/SCFToEmitC/SCFToEmitC.h b/mlir/include/mlir/Conversion/SCFToEmitC/SCFToEmitC.h index 7ff682293f48bf..22df7f1c5dcf29 100644 --- a/mlir/include/mlir/Conversion/SCFToEmitC/SCFToEmitC.h +++ b/mlir/include/mlir/Conversion/SCFToEmitC/SCFToEmitC.h @@ -20,10 +20,6 @@ class RewritePatternSet; /// Collect a set of patterns to convert SCF operations to the EmitC dialect. void populateSCFToEmitCConversionPatterns(RewritePatternSet &patterns); - -/// Creates a pass to convert SCF operations to the EmitC dialect. -std::unique_ptr createConvertSCFToEmitCPass(); - } // namespace mlir #endif // MLIR_CONVERSION_SCFTOEMITC_SCFTOEMITC_H diff --git a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp index bf69ba503f4e6b..367142a5207427 100644 --- a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp +++ b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp @@ -199,7 +199,3 @@ void SCFToEmitCPass::runOnOperation() { applyPartialConversion(getOperation(), target, std::move(patterns)))) signalPassFailure(); } - -std::unique_ptr mlir::createConvertSCFToEmitCPass() { - return std::make_unique(); -}