Skip to content

Commit

Permalink
AMDGPU: Add f64 to f32 support for llvm.fptrunc.round (llvm#107481)
Browse files Browse the repository at this point in the history
  • Loading branch information
changpeng committed Sep 6, 2024
1 parent dcfa147 commit 24267a7
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 17 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;

def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]

def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;

def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;

// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Target/AMDGPU/SIModeRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
Expand All @@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
B.addImm(0); // omod
} else
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
return Status(FP_ROUND_MODE_DP(3),
FP_ROUND_MODE_DP(Mode));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
default:
return DefaultStatus;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s

; TODO: check for GISEL when bfloat is supported.
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s

;--- f16-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
Expand All @@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
ret void
}

;--- f32-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
; F32-F64-FAIL: LLVM ERROR: Cannot select
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
store float %res, ptr addrspace(1) %out, align 4
ret void
}

;--- bf16-f32-err.ll
define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
; BF16-F32-FAIL: LLVM ERROR: Cannot select
Expand All @@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %

;--- f16-f32-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
store half %res, ptr addrspace(1) %out, align 2
ret void
}

;--- f32-f64-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
store float %res, ptr addrspace(1) %out, align 4
ret void
}
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
Original file line number Diff line number Diff line change
Expand Up @@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
%res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
ret <8 x half> %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
ret float %res
}

0 comments on commit 24267a7

Please sign in to comment.