From 24267a7e14b35f41ab55e15ba12bb80c82881941 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 5 Sep 2024 22:57:27 -0700 Subject: [PATCH] AMDGPU: Add f64 to f32 support for llvm.fptrunc.round (#107481) --- llvm/lib/Target/AMDGPU/SIInstructions.td | 6 +++ llvm/lib/Target/AMDGPU/SIModeRegister.cpp | 12 ++++-- .../CodeGen/AMDGPU/llvm.fptrunc.round.err.ll | 28 ++++++------- .../test/CodeGen/AMDGPU/llvm.fptrunc.round.ll | 39 +++++++++++++++++++ 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 69e1b9a38324f2..c0154645b391df 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI< let Uses = [MODE, EXEC] in { def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), (ins VGPR_32:$src0, i32imm:$round)>; + +def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), + (ins VReg_64:$src0, i32imm:$round)>; } // End Uses = [MODE, EXEC] def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))), (FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>; +def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))), + (FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>; + // Invert the exec mask and overwrite the inactive lanes of dst with inactive, // restoring it after we're done. let Defs = [SCC], isConvergent = 1 in { diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index a590c6560942cf..6bcf9757d29457 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII) { unsigned Opcode = MI.getOpcode(); if (TII->usesFPDPRounding(MI) || - Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) { + Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO || + Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) { switch (Opcode) { case AMDGPU::V_INTERP_P1LL_F16: case AMDGPU::V_INTERP_P1LV_F16: @@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI, B.addImm(0); // omod } else MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); - return Status(FP_ROUND_MODE_DP(3), - FP_ROUND_MODE_DP(Mode)); + return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); + } + case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: { + unsigned Mode = MI.getOperand(2).getImm(); + MI.removeOperand(2); + MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32)); + return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode)); } default: return DefaultStatus; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll index 291fe00a6177bd..21fe1ce4dc1d6f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll @@ -3,15 +3,15 @@ ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s - ; TODO: check for GISEL when bfloat is supported. ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s + +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s ;--- f16-f64-err.ll define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) { @@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o ret void } -;--- f32-f64-err.ll -define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) { -; F32-F64-FAIL: LLVM ERROR: Cannot select - %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward") - store float %res, ptr addrspace(1) %out, align 4 - ret void -} - ;--- bf16-f32-err.ll define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) { ; BF16-F32-FAIL: LLVM ERROR: Cannot select @@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) % ;--- f16-f32-tonearestaway-err.ll define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) { -; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select +; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway") store half %res, ptr addrspace(1) %out, align 2 ret void } + +;--- f32-f64-tonearestaway-err.ll +define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) { +; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select + %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway") + store float %res, ptr addrspace(1) %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll index 54ed6f1eb42820..3d9ce6e79d9d28 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll @@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float> %res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward") ret <8 x half> %res } + +define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) { +; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] +; CHECK-NEXT: ; return to shader part epilog + %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest") + ret float %res +} + +define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) { +; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1 +; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] +; CHECK-NEXT: ; return to shader part epilog + %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward") + ret float %res +} + +define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) { +; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1 +; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] +; CHECK-NEXT: ; return to shader part epilog + %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward") + ret float %res +} + +define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) { +; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 +; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] +; CHECK-NEXT: ; return to shader part epilog + %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero") + ret float %res +}