Skip to content

Commit

Permalink
[X86] Add handling for shift_logical(select(icmp_uge(amt,BW),0,x),amt…
Browse files Browse the repository at this point in the history
…) -> avx2 shift(x,amt)

We need to catch this otherwise pre-AVX512 targets will fold this to shift_logical(and(icmp_ult(amt,BW),x),amt)
  • Loading branch information
RKSimon committed Jul 12, 2024
1 parent 7918e62 commit 05f9877
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 28 deletions.
16 changes: 16 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48042,6 +48042,14 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1);
}
// fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt)
if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1);
}
}

// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
Expand Down Expand Up @@ -48176,6 +48184,14 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1);
}
// fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt)
if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
SV == VT.getScalarSizeInBits()) {
return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1);
}
}

// Only do this on the last DAG combine as it can interfere with other
Expand Down
19 changes: 5 additions & 14 deletions llvm/test/CodeGen/X86/combine-shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1044,19 +1044,10 @@ define <4 x i32> @combine_vec_shl_commuted_clamped(<4 x i32> %sh, <4 x i32> %amt
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX2-LABEL: combine_vec_shl_commuted_clamped:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: combine_vec_shl_commuted_clamped:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
; AVX-LABEL: combine_vec_shl_commuted_clamped:
; AVX: # %bb.0:
; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
%shl = shl <4 x i32> %1, %amt
Expand Down Expand Up @@ -1112,4 +1103,4 @@ define <4 x i32> @combine_vec_shl_commuted_clamped1(<4 x i32> %sh, <4 x i32> %am
%shl = shl <4 x i32> %sh, %amt
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shl
ret <4 x i32> %1
}
}
19 changes: 5 additions & 14 deletions llvm/test/CodeGen/X86/combine-srl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -771,19 +771,10 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped(<4 x i32> %sh, <4 x i32> %am
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
; SSE41-NEXT: retq
;
; AVX2-LABEL: combine_vec_lshr_commuted_clamped:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: combine_vec_lshr_commuted_clamped:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
; AVX-LABEL: combine_vec_lshr_commuted_clamped:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
%shr = lshr <4 x i32> %1, %amt
Expand Down Expand Up @@ -854,4 +845,4 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped1(<4 x i32> %sh, <4 x i32> %a
%shr = lshr <4 x i32> %sh, %amt
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shr
ret <4 x i32> %1
}
}

0 comments on commit 05f9877

Please sign in to comment.