From 05f987743170cbd9fc97699c7a9b352055de7300 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 12 Jul 2024 13:47:10 +0100 Subject: [PATCH] [X86] Add handling for shift_logical(select(icmp_uge(amt,BW),0,x),amt) -> avx2 shift(x,amt) We need to catch this otherwise pre-AVX512 targets will fold this to shift_logical(and(icmp_ult(amt,BW),x),amt) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++ llvm/test/CodeGen/X86/combine-shl.ll | 19 +++++-------------- llvm/test/CodeGen/X86/combine-srl.ll | 19 +++++-------------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0a11e2134c7088..a731541ca7778e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48042,6 +48042,14 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG, SV == VT.getScalarSizeInBits()) { return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1); } + // fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt) + if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 && + cast(Cond.getOperand(2))->get() == ISD::SETUGE && + ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) && + ISD::isConstantSplatVectorAllZeros(N00.getNode()) && + SV == VT.getScalarSizeInBits()) { + return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1); + } } // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) @@ -48176,6 +48184,14 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, SV == VT.getScalarSizeInBits()) { return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1); } + // fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt) + if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 && + cast(Cond.getOperand(2))->get() == ISD::SETUGE && + ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) && + ISD::isConstantSplatVectorAllZeros(N00.getNode()) && + SV == VT.getScalarSizeInBits()) { + return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1); + } } // Only do this on the last DAG combine as it can interfere with other diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index c5ce1e0046ad09..8d8c1d26fc5cac 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -1044,19 +1044,10 @@ define <4 x i32> @combine_vec_shl_commuted_clamped(<4 x i32> %sh, <4 x i32> %amt ; SSE41-NEXT: pmulld %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX2-LABEL: combine_vec_shl_commuted_clamped: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: combine_vec_shl_commuted_clamped: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: combine_vec_shl_commuted_clamped: +; AVX: # %bb.0: +; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %cmp.i = icmp uge <4 x i32> %amt, %1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh %shl = shl <4 x i32> %1, %amt @@ -1112,4 +1103,4 @@ define <4 x i32> @combine_vec_shl_commuted_clamped1(<4 x i32> %sh, <4 x i32> %am %shl = shl <4 x i32> %sh, %amt %1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shl ret <4 x i32> %1 -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll index 78dcf6e7434007..f2a9aa217f7ec6 100644 --- a/llvm/test/CodeGen/X86/combine-srl.ll +++ b/llvm/test/CodeGen/X86/combine-srl.ll @@ -771,19 +771,10 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped(<4 x i32> %sh, <4 x i32> %am ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7] ; SSE41-NEXT: retq ; -; AVX2-LABEL: combine_vec_lshr_commuted_clamped: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] -; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2 -; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: combine_vec_lshr_commuted_clamped: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: combine_vec_lshr_commuted_clamped: +; AVX: # %bb.0: +; AVX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %cmp.i = icmp uge <4 x i32> %amt, %1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh %shr = lshr <4 x i32> %1, %amt @@ -854,4 +845,4 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped1(<4 x i32> %sh, <4 x i32> %a %shr = lshr <4 x i32> %sh, %amt %1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shr ret <4 x i32> %1 -} \ No newline at end of file +}