Skip to content

Commit

Permalink
[AArch64] Peephole optimization to remove redundant csel instructions (
Browse files Browse the repository at this point in the history
…llvm#101483)

Given a sequence such as

  %8:gpr64 = COPY $xzr
  %10:gpr64 = COPY $xzr
  %11:gpr64 = CSELXr %8:gpr64, %10:gpr64, 0, implicit $nzcv

`PeepholeOptimizer::foldRedundantCopy` led to the creation of select
instructions where both inputs were the same register:

  %11:gpr64 = CSELXr %8:gpr64, %8:gpr64, 0, implicit $nzcv

This change adds a later peephole optimization that replaces such
selects with unconditional moves.
  • Loading branch information
citymarina authored Aug 5, 2024
1 parent 69fe7da commit 29763aa
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
28 changes: 28 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
// %6:fpr128 = IMPLICIT_DEF
// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
//
// 8. Remove redundant CSELs that select between identical registers, by
// replacing them with unconditional moves.
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
Expand Down Expand Up @@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI);
bool visitORR(MachineInstr &MI);
bool visitCSEL(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
Expand Down Expand Up @@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
return true;
}

bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
// Replace CSEL with MOV when both inputs are the same register.
if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
return false;

auto ZeroReg =
MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
auto OrOpcode =
MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;

BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
.addReg(MI.getOperand(0).getReg(), RegState::Define)
.addReg(ZeroReg)
.addReg(MI.getOperand(1).getReg())
.addImm(0);

MI.eraseFromParent();
return true;
}

bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
// Check this INSERT_SUBREG comes from below zero-extend pattern.
//
Expand Down Expand Up @@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
{AArch64::ADDXri, AArch64::ADDSXri}, MI);
break;
case AArch64::CSELWr:
case AArch64::CSELXr:
Changed |= visitCSEL(MI);
break;
case AArch64::INSvi64gpr:
Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
break;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/peephole-csel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w2, #0x1
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: csel x9, xzr, xzr, eq
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: tst w1, #0x1
; CHECK-NEXT: csel x8, x8, x9, eq
; CHECK-NEXT: str x8, [x0]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/peephole-csel.mir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv
; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0
; CHECK-NEXT: RET_ReallyLR
%3:gpr64 = COPY $x1
%4:gpr64 = COPY $x0
Expand All @@ -46,7 +46,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv
; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0
; CHECK-NEXT: RET_ReallyLR
%3:gpr32 = COPY $w1
%4:gpr32 = COPY $w0
Expand Down

0 comments on commit 29763aa

Please sign in to comment.