diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c48d6c4adf6151..f85c1296cdce85 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -987,6 +987,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); + SDValue WidenVecRes_XRINT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_IS_FPCLASS(SDNode *N); SDValue WidenVecRes_ExpOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 02d9ce4f0a44d4..a1a9f0f0615cbc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4204,6 +4204,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_FP_TO_XINT_SAT(N); break; + case ISD::LRINT: + case ISD::LLRINT: + Res = WidenVecRes_XRINT(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -4216,8 +4221,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FRINT: - case ISD::LRINT: - case ISD::LLRINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: @@ -4791,6 +4794,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1)); } +SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) { + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ElementCount WidenNumElts = WidenVT.getVectorElementCount(); + + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Also widen the input. + if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) { + Src = GetWidenedVector(Src); + SrcVT = Src.getValueType(); + } + + // Input and output not widened to the same size, give up. + if (WidenNumElts != SrcVT.getVectorElementCount()) + return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); + + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); +} + SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { SDValue InOp = N->getOperand(1); SDLoc DL(N); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index ff83e7c8c32ae9..5d34cd6592702e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -150,7 +150,22 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call llrintf@plt +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 2 ; RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index cd4eec44920c95..7cb864546cebcb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -111,11 +111,14 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) { ; RV64-i32-NEXT: vfmv.f.s fa5, v10 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 ; RV64-i32-NEXT: vslide1down.vx v9, v9, a0 -; RV64-i32-NEXT: vslidedown.vi v8, v8, 2 +; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: vslide1down.vx v9, v9, a0 +; RV64-i32-NEXT: vslidedown.vi v8, v8, 3 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 ; RV64-i32-NEXT: vslide1down.vx v8, v9, a0 -; RV64-i32-NEXT: vslidedown.vi v8, v8, 1 ; RV64-i32-NEXT: ret ; ; RV64-i64-LABEL: lrint_v3f32: diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll index 7373cd32df98d4..f527a3584f4470 100644 --- a/llvm/test/CodeGen/X86/vector-lrint.ll +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -1,13 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefix=X86-AVX -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefix=X86-AVX -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX1-i32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX512-i32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX1-i64 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX512-i64 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; X86-SSE2-LABEL: lrint_v1f32: @@ -19,6 +17,16 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v1f32: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v1f32: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-i64-NEXT: retq %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) ret <1 x iXLen> %a } @@ -60,6 +68,31 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { ; X86-AVX-NEXT: vcvtss2si %xmm0, %eax ; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v2f32: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; X64-AVX-i32-NEXT: vcvtss2si %xmm1, %eax +; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %ecx +; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v2f32: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX-i64-NEXT: retq %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) ret <2 x iXLen> %a } @@ -101,6 +134,57 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { ; X86-AVX-NEXT: vcvtss2si %xmm0, %eax ; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 ; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v4f32: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; X64-AVX-i32-NEXT: vcvtss2si %xmm1, %eax +; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %ecx +; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v4f32: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX1-i64-NEXT: retq +; +; X64-AVX512-i64-LABEL: lrint_v4f32: +; X64-AVX512-i64: # %bb.0: +; X64-AVX512-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX512-i64-NEXT: retq %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) ret <4 x iXLen> %a } @@ -143,6 +227,186 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { ; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 ; X86-SSE2-NEXT: retl +; +; X86-AVX1-LABEL: lrint_v8f32: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; X86-AVX1-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX1-NEXT: vcvtss2si %xmm1, %ecx +; X86-AVX1-NEXT: vmovd %ecx, %xmm2 +; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] +; X86-AVX1-NEXT: vcvtss2si %xmm3, %eax +; X86-AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X86-AVX1-NEXT: vcvtss2si %xmm1, %eax +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X86-AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X86-AVX1-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX1-NEXT: vcvtss2si %xmm0, %ecx +; X86-AVX1-NEXT: vmovd %ecx, %xmm2 +; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X86-AVX1-NEXT: vcvtss2si %xmm3, %eax +; X86-AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X86-AVX1-NEXT: vcvtss2si %xmm0, %eax +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-AVX1-NEXT: retl +; +; X86-AVX512-LABEL: lrint_v8f32: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; X86-AVX512-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX512-NEXT: vcvtss2si %xmm1, %ecx +; X86-AVX512-NEXT: vmovd %ecx, %xmm2 +; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] +; X86-AVX512-NEXT: vcvtss2si %xmm3, %eax +; X86-AVX512-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X86-AVX512-NEXT: vcvtss2si %xmm1, %eax +; X86-AVX512-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X86-AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X86-AVX512-NEXT: vcvtss2si %xmm2, %eax +; X86-AVX512-NEXT: vcvtss2si %xmm0, %ecx +; X86-AVX512-NEXT: vmovd %ecx, %xmm2 +; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X86-AVX512-NEXT: vcvtss2si %xmm3, %eax +; X86-AVX512-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X86-AVX512-NEXT: vcvtss2si %xmm0, %eax +; X86-AVX512-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X86-AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X86-AVX512-NEXT: retl +; +; X64-AVX1-i32-LABEL: lrint_v8f32: +; X64-AVX1-i32: # %bb.0: +; X64-AVX1-i32-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX1-i32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX1-i32-NEXT: vcvtss2si %xmm1, %ecx +; X64-AVX1-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX1-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm3, %eax +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm1, %eax +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X64-AVX1-i32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX1-i32-NEXT: vcvtss2si %xmm0, %ecx +; X64-AVX1-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX1-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm3, %eax +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-AVX1-i32-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X64-AVX1-i32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX1-i32-NEXT: retq +; +; X64-AVX512-i32-LABEL: lrint_v8f32: +; X64-AVX512-i32: # %bb.0: +; X64-AVX512-i32-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX512-i32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vcvtss2si %xmm1, %ecx +; X64-AVX512-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX512-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm3, %eax +; X64-AVX512-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm1, %eax +; X64-AVX512-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X64-AVX512-i32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vcvtss2si %xmm0, %ecx +; X64-AVX512-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX512-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm3, %eax +; X64-AVX512-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-AVX512-i32-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX512-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X64-AVX512-i32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX512-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v8f32: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; X64-AVX1-i64-NEXT: vmovaps %ymm2, %ymm0 +; X64-AVX1-i64-NEXT: retq +; +; X64-AVX512-i64-LABEL: lrint_v8f32: +; X64-AVX512-i64: # %bb.0: +; X64-AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm3, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; X64-AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm3, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X64-AVX512-i64-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; X64-AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X64-AVX512-i64-NEXT: retq %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) ret <8 x iXLen> %a } @@ -154,193 +418,167 @@ define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <1 x i64> @lrint_v1f64(<1 x double> %x) { +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; X86-SSE2-LABEL: lrint_v1f64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp -; X86-SSE2-NEXT: andl $-8, %esp -; X86-SSE2-NEXT: subl $8, %esp -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movsd %xmm0, (%esp) -; X86-SSE2-NEXT: fldl (%esp) -; X86-SSE2-NEXT: fistpll (%esp) -; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movd %xmm0, %eax -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; X86-SSE2-NEXT: movd %xmm0, %edx -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: lrint_v1f64: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp -; X86-AVX-NEXT: andl $-8, %esp -; X86-AVX-NEXT: subl $8, %esp -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovsd %xmm0, (%esp) -; X86-AVX-NEXT: fldl (%esp) -; X86-AVX-NEXT: fistpll (%esp) -; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovd %xmm0, %eax -; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: lrint_v1f64: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: lrint_v1f64: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax -; X64-AVX-NEXT: retq - %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) - ret <1 x i64> %a +; X64-AVX-i32-LABEL: lrint_v1f64: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v1f64: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-i64-NEXT: retq + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x i64> @lrint_v2f64(<2 x double> %x) { +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { ; X86-SSE2-LABEL: lrint_v2f64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp -; X86-SSE2-NEXT: andl $-8, %esp -; X86-SSE2-NEXT: subl $16, %esp -; X86-SSE2-NEXT: movhps %xmm0, (%esp) -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl (%esp) -; X86-SSE2-NEXT: fistpll (%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 ; X86-SSE2-NEXT: retl ; ; X86-AVX-LABEL: lrint_v2f64: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX-NEXT: .cfi_offset %ebp, -8 -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: .cfi_def_cfa_register %ebp -; X86-AVX-NEXT: andl $-8, %esp -; X86-AVX-NEXT: subl $16, %esp -; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: vmovhps %xmm0, (%esp) -; X86-AVX-NEXT: fldl {{[0-9]+}}(%esp) -; X86-AVX-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-AVX-NEXT: fldl (%esp) -; X86-AVX-NEXT: fistpll (%esp) -; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] +; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax +; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm0 +; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; X86-AVX-NEXT: retl ; -; X64-SSE-LABEL: lrint_v2f64: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm1 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-SSE-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: lrint_v2f64: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax -; X64-AVX-NEXT: vmovq %rax, %xmm1 -; X64-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] -; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax -; X64-AVX-NEXT: vmovq %rax, %xmm0 -; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-AVX-NEXT: retq - %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) - ret <2 x i64> %a +; X64-AVX-i32-LABEL: lrint_v2f64: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] +; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax +; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx +; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0 +; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: lrint_v2f64: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-AVX-i64-NEXT: retq + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x i64> @lrint_v4f64(<4 x double> %x) { +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { ; X86-SSE2-LABEL: lrint_v4f64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp -; X86-SSE2-NEXT: andl $-8, %esp -; X86-SSE2-NEXT: subl $32, %esp -; X86-SSE2-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movhps %xmm1, (%esp) -; X86-SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl (%esp) -; X86-SSE2-NEXT: fistpll (%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 ; X86-SSE2-NEXT: retl ; -; X64-SSE-LABEL: lrint_v4f64: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm2 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; X64-SSE-NEXT: cvtsd2si %xmm1, %rax -; X64-SSE-NEXT: movq %rax, %xmm3 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm1, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; X64-SSE-NEXT: movdqa %xmm2, %xmm0 -; X64-SSE-NEXT: movdqa %xmm3, %xmm1 -; X64-SSE-NEXT: retq - %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) - ret <4 x i64> %a +; X86-AVX-LABEL: lrint_v4f64: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] +; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax +; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx +; X86-AVX-NEXT: vmovd %ecx, %xmm1 +; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X86-AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X86-AVX-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: lrint_v4f64: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] +; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax +; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx +; X64-AVX-i32-NEXT: vmovd %ecx, %xmm1 +; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; X64-AVX-i32-NEXT: vzeroupper +; X64-AVX-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v4f64: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX1-i64-NEXT: retq +; +; X64-AVX512-i64-LABEL: lrint_v4f64: +; X64-AVX512-i64: # %bb.0: +; X64-AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX512-i64-NEXT: retq + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x i64> @lrint_v8f64(<8 x double> %x) { +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; X86-SSE2-LABEL: lrint_v8f64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %ebp @@ -349,81 +587,216 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; X86-SSE2-NEXT: movl %esp, %ebp ; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp ; X86-SSE2-NEXT: andl $-16, %esp -; X86-SSE2-NEXT: subl $80, %esp -; X86-SSE2-NEXT: movaps 8(%ebp), %xmm3 -; X86-SSE2-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movhps %xmm1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movlps %xmm1, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movhps %xmm2, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movlps %xmm2, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movhps %xmm3, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movlps %xmm3, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpll {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; X86-SSE2-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero -; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero -; X86-SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; X86-SSE2-NEXT: subl $16, %esp +; X86-SSE2-NEXT: movapd %xmm0, %xmm3 +; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4 +; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm5 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax +; X86-SSE2-NEXT: movd %eax, %xmm0 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax +; X86-SSE2-NEXT: movd %eax, %xmm3 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm1 +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] +; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax +; X86-SSE2-NEXT: movd %eax, %xmm2 +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; X86-SSE2-NEXT: movl %ebp, %esp ; X86-SSE2-NEXT: popl %ebp ; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 ; X86-SSE2-NEXT: retl ; -; X64-SSE-LABEL: lrint_v8f64: -; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm4 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm0, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] -; X64-SSE-NEXT: cvtsd2si %xmm1, %rax -; X64-SSE-NEXT: movq %rax, %xmm5 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm1, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0] -; X64-SSE-NEXT: cvtsd2si %xmm2, %rax -; X64-SSE-NEXT: movq %rax, %xmm6 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm2, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0] -; X64-SSE-NEXT: cvtsd2si %xmm3, %rax -; X64-SSE-NEXT: movq %rax, %xmm7 -; X64-SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] -; X64-SSE-NEXT: cvtsd2si %xmm3, %rax -; X64-SSE-NEXT: movq %rax, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0] -; X64-SSE-NEXT: movdqa %xmm4, %xmm0 -; X64-SSE-NEXT: movdqa %xmm5, %xmm1 -; X64-SSE-NEXT: movdqa %xmm6, %xmm2 -; X64-SSE-NEXT: movdqa %xmm7, %xmm3 -; X64-SSE-NEXT: retq - %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) - ret <8 x i64> %a +; X86-AVX1-LABEL: lrint_v8f64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] +; X86-AVX1-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX1-NEXT: vcvtsd2si %xmm1, %ecx +; X86-AVX1-NEXT: vmovd %ecx, %xmm2 +; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; X86-AVX1-NEXT: vcvtsd2si %xmm1, %eax +; X86-AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X86-AVX1-NEXT: vcvtsd2si %xmm1, %eax +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X86-AVX1-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX1-NEXT: vcvtsd2si %xmm0, %ecx +; X86-AVX1-NEXT: vmovd %ecx, %xmm2 +; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X86-AVX1-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-AVX1-NEXT: retl +; +; X86-AVX512-LABEL: lrint_v8f64: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] +; X86-AVX512-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX512-NEXT: vcvtsd2si %xmm1, %ecx +; X86-AVX512-NEXT: vmovd %ecx, %xmm1 +; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X86-AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm2 +; X86-AVX512-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX512-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X86-AVX512-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX512-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X86-AVX512-NEXT: vcvtsd2si %xmm2, %eax +; X86-AVX512-NEXT: vcvtsd2si %xmm0, %ecx +; X86-AVX512-NEXT: vmovd %ecx, %xmm2 +; X86-AVX512-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX512-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX512-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X86-AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X86-AVX512-NEXT: vcvtsd2si %xmm0, %eax +; X86-AVX512-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X86-AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X86-AVX512-NEXT: retl +; +; X64-AVX1-i32-LABEL: lrint_v8f64: +; X64-AVX1-i32: # %bb.0: +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm1, %ecx +; X64-AVX1-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX1-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vextractf128 $1, %ymm1, %xmm1 +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm1, %eax +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm1, %eax +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm0, %ecx +; X64-AVX1-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX1-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX1-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX1-i32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX1-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX1-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X64-AVX1-i32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX1-i32-NEXT: retq +; +; X64-AVX512-i32-LABEL: lrint_v8f64: +; X64-AVX512-i32: # %bb.0: +; X64-AVX512-i32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm1, %ecx +; X64-AVX512-i32-NEXT: vmovd %ecx, %xmm1 +; X64-AVX512-i32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; X64-AVX512-i32-NEXT: vextractf32x4 $3, %zmm0, %xmm2 +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm2, %eax +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm0, %ecx +; X64-AVX512-i32-NEXT: vmovd %ecx, %xmm2 +; X64-AVX512-i32-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX512-i32-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 +; X64-AVX512-i32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX512-i32-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX512-i32-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 +; X64-AVX512-i32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; X64-AVX512-i32-NEXT: retq +; +; X64-AVX1-i64-LABEL: lrint_v8f64: +; X64-AVX1-i64: # %bb.0: +; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm1, %xmm2 +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; X64-AVX1-i64-NEXT: retq +; +; X64-AVX512-i64-LABEL: lrint_v8f64: +; X64-AVX512-i64: # %bb.0: +; X64-AVX512-i64-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm1 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; X64-AVX512-i64-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; X64-AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm2 +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm2 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm3 +; X64-AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; X64-AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX512-i64-NEXT: vmovq %rax, %xmm0 +; X64-AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] +; X64-AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; X64-AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X64-AVX512-i64-NEXT: retq + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)