diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 0dbfc0d64376e7..5b1e79b9fad3e3 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -61,6 +61,9 @@ // %6:fpr128 = IMPLICIT_DEF // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub // +// 8. Remove redundant CSELs that select between identical registers, by +// replacing them with unconditional moves. +// //===----------------------------------------------------------------------===// #include "AArch64ExpandImm.h" @@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { template bool visitAND(unsigned Opc, MachineInstr &MI); bool visitORR(MachineInstr &MI); + bool visitCSEL(MachineInstr &MI); bool visitINSERT(MachineInstr &MI); bool visitINSviGPR(MachineInstr &MI, unsigned Opc); bool visitINSvi64lane(MachineInstr &MI); @@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { return true; } +bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) { + // Replace CSEL with MOV when both inputs are the same register. + if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) + return false; + + auto ZeroReg = + MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR; + auto OrOpcode = + MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs; + + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode)) + .addReg(MI.getOperand(0).getReg(), RegState::Define) + .addReg(ZeroReg) + .addReg(MI.getOperand(1).getReg()) + .addImm(0); + + MI.eraseFromParent(); + return true; +} + bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { // Check this INSERT_SUBREG comes from below zero-extend pattern. // @@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { visitADDSSUBS({AArch64::SUBXri, AArch64::SUBSXri}, {AArch64::ADDXri, AArch64::ADDSXri}, MI); break; + case AArch64::CSELWr: + case AArch64::CSELXr: + Changed |= visitCSEL(MI); + break; case AArch64::INSvi64gpr: Changed |= visitINSviGPR(MI, AArch64::INSvi64lane); break; diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll index 3f92943b11eb1d..868b9f1f2f6ac9 100644 --- a/llvm/test/CodeGen/AArch64/peephole-csel.ll +++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll @@ -6,7 +6,7 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w2, #0x1 ; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: csel x9, xzr, xzr, eq +; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: tst w1, #0x1 ; CHECK-NEXT: csel x8, x8, x9, eq ; CHECK-NEXT: str x8, [x0] diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.mir b/llvm/test/CodeGen/AArch64/peephole-csel.mir index 5077441a33788a..d424dc05c801c8 100644 --- a/llvm/test/CodeGen/AArch64/peephole-csel.mir +++ b/llvm/test/CodeGen/AArch64/peephole-csel.mir @@ -19,7 +19,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 ; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv - ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0 ; CHECK-NEXT: RET_ReallyLR %3:gpr64 = COPY $x1 %4:gpr64 = COPY $x0 @@ -46,7 +46,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv - ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv + ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0 ; CHECK-NEXT: RET_ReallyLR %3:gpr32 = COPY $w1 %4:gpr32 = COPY $w0