Skip to content

Commit

Permalink
[X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID/CET i…
Browse files Browse the repository at this point in the history
…nstructions (llvm#76786)

R16-R31 was added into GPRs in
llvm#70958,
This patch supports the lowering for promoted
SHA/MOVDIR/CRC32/INVPCID/CET.

RFC:
https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
  • Loading branch information
XinWang10 authored Jan 5, 2024
1 parent 59af659 commit f5f66e2
Show file tree
Hide file tree
Showing 12 changed files with 478 additions and 175 deletions.
67 changes: 29 additions & 38 deletions llvm/lib/Target/X86/X86DomainReassignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};

bool HasEGPR = STI->hasEGPR();
createReplacerDstCOPY(X86::MOVZX32rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX64rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC
createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));

createReplacerDstCOPY(X86::MOVZX32rr16,
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX64rr16,
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));

if (STI->hasDQI()) {
createReplacerDstCOPY(X86::MOVZX16rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX32rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX64rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);

createReplacerDstCOPY(X86::MOVZX16rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX32rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX64rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));

createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
}

auto createReplacer = [&](unsigned From, unsigned To) {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};

createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
createReplacer(X86::NOT16r, X86::KNOTWrr);
Expand All @@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);

if (STI->hasBWI()) {
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm));
createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm));

createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk));
createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk));

createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));

createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
Expand Down Expand Up @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() {

// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
// createReplacer(X86::TEST32rr, X86::KTESTDrr);
// createReplacer(X86::TEST64rr, X86::KTESTQrr);
}

if (STI->hasDQI()) {
Expand All @@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() {

createReplacer(X86::AND8rr, X86::KANDBrr);

createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));

createReplacer(X86::NOT8r, X86::KNOTBrr);

Expand All @@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() {

// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
//createReplacer(X86::TEST16rr, X86::KTESTWrr);
// createReplacer(X86::TEST8rr, X86::KTESTBrr);
// createReplacer(X86::TEST16rr, X86::KTESTWrr);

createReplacer(X86::XOR8rr, X86::KXORBrr);
}
#undef GET_EGPR_IF_ENABLED
}

bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Unexpected intrinsic.");
#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
case Intrinsic::x86_sse42_crc32_32_8:
Opc = X86::CRC32r32r8;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_16:
Opc = X86::CRC32r32r16;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_32:
Opc = X86::CRC32r32r32;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_64_64:
Opc = X86::CRC32r64r64;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64);
RC = &X86::GR64RegClass;
break;
#undef GET_EGPR_IF_ENABLED
}

const Value *LHS = II->getArgOperand(0);
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/X86/X86InstrSystem.td
Original file line number Diff line number Diff line change
Expand Up @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[In64BitMode, HasINVPCID]>;
Requires<[In64BitMode]>;

def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
} // SchedRW

let Predicates = [In64BitMode, HasINVPCID] in {
let Predicates = [HasINVPCID, NoEGPR] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
Expand All @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
addr:$src2)>;
}

let Predicates = [HasINVPCID, HasEGPR] in {
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
(INVPCID64_EVEX
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
addr:$src2)>;
}


//===----------------------------------------------------------------------===//
// SMAP Instruction
Expand Down
41 changes: 30 additions & 11 deletions llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c

Expand All @@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32b %sil, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u8:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i8
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
ret i32 %res
Expand All @@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32w %si, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u16:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i16
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
ret i32 %res
Expand All @@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32l %esi, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u32:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
ret i32 %res
}
Expand Down
29 changes: 21 additions & 8 deletions llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c

; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
; CHECK-LABEL: test_mm_crc64_u8:
; CHECK: # %bb.0:
; CHECK-NEXT: crc32b %sil, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc64_u8:
; EGPR: # %bb.0:
; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i8
%res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
ret i64 %res
Expand All @@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
; CHECK-LABEL: test_mm_crc64_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: crc32q %rsi, %rax
; CHECK-NEXT: retq
; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc64_u64:
; EGPR: # %bb.0:
; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
ret i64 %res
}
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR

define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X86-LABEL: crc32_32_8:
Expand All @@ -14,6 +15,12 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_8:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
Expand All @@ -31,6 +38,12 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_16:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
ret i32 %tmp
}
Expand All @@ -48,6 +61,12 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_32:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
ret i32 %tmp
}
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR

declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
Expand All @@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_64_8:
; EGPR: ## %bb.0:
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
ret i64 %tmp
}
Expand All @@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_64_64:
; EGPR: ## %bb.0:
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
ret i64 %tmp
}
Loading

0 comments on commit f5f66e2

Please sign in to comment.