From b90cfe46015d9d12317146251e633d2ca07dccd7 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 2 Nov 2023 10:35:15 +0000 Subject: [PATCH] [AMDGPU] New ttracedata intrinsics (#70235) Add llvm.amdgcn.s.ttracedata and llvm.amdgcn.s.ttracedata.imm which map directly to the corresponding instructions s_ttracedata and s_ttracedata_imm. These are inherently whole-wave operations so any non-uniform inputs are readfirstlaned. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 7 +++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 10 ++++ llvm/lib/Target/AMDGPU/SOPInstructions.td | 9 +++- .../AMDGPU/llvm.amdgcn.s.ttracedata.ll | 53 +++++++++++++++++++ 4 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 0daa5a71340d6e..33fa3985e64d8b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1697,6 +1697,13 @@ def int_amdgcn_s_setprio : DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; +def int_amdgcn_s_ttracedata : + DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects]>; +def int_amdgcn_s_ttracedata_imm : + DefaultAttrsIntrinsic<[], [llvm_i16_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>]>; + // This is IntrHasSideEffects so it can be used to read cycle counters. def int_amdgcn_s_getreg : ClangBuiltin<"__builtin_amdgcn_s_getreg">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 259af55885fc0e..047108fd06db5f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3066,6 +3066,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(B, MI, 2); return; } + case Intrinsic::amdgcn_s_ttracedata: + constrainOpWithReadfirstlane(B, MI, 1); // M0 + return; case Intrinsic::amdgcn_raw_buffer_load_lds: case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: { applyDefaultMapping(OpdMapper); @@ -4670,6 +4673,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32); break; } + case Intrinsic::amdgcn_s_ttracedata: { + // This must be an SGPR, but accept a VGPR. + unsigned Bank = + getRegBankID(MI.getOperand(1).getReg(), MRI, AMDGPU::SGPRRegBankID); + OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32); + break; + } case Intrinsic::amdgcn_end_cf: { unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index edbfd79db3fdb9..a28921a7ff33f6 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1503,7 +1503,10 @@ def S_INCPERFLEVEL : SOPP_Pseudo <"s_incperflevel", (ins i32imm:$simm16), "$simm def S_DECPERFLEVEL : SOPP_Pseudo <"s_decperflevel", (ins i32imm:$simm16), "$simm16", [(int_amdgcn_s_decperflevel timm:$simm16)]> { } -def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins)> { + +let Uses = [M0] in +def S_TTRACEDATA : SOPP_Pseudo <"s_ttracedata", (ins), "", + [(int_amdgcn_s_ttracedata M0)]> { let simm16 = 0; let fixed_imm = 1; } @@ -1547,8 +1550,10 @@ let SubtargetPredicate = isGFX10Plus in { [(SIdenorm_mode (i32 timm:$simm16))]>; } + let hasSideEffects = 1 in def S_TTRACEDATA_IMM : - SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16">; + SOPP_Pseudo<"s_ttracedata_imm", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_ttracedata_imm timm:$simm16)]>; } // End SubtargetPredicate = isGFX10Plus let SubtargetPredicate = isGFX11Plus in { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll new file mode 100644 index 00000000000000..37b5357950e648 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s + +declare void @llvm.amdgcn.s.ttracedata(i32) +declare void @llvm.amdgcn.s.ttracedata.imm(i16) + +define amdgpu_cs void @ttracedata_c() { +; GFX11-LABEL: ttracedata_c: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 m0, 0xf4240 +; GFX11-NEXT: s_ttracedata +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 1000000) + ret void +} + +define amdgpu_cs void @ttracedata_s(i32 inreg %val) { +; GFX11-LABEL: ttracedata_s: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_mov_b32 m0, s0 +; GFX11-NEXT: s_ttracedata +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 %val) + ret void +} + +define amdgpu_cs void @ttracedata_v(i32 %val) { +; GFX11-SDAG-LABEL: ttracedata_v: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: s_mov_b32 m0, s0 +; GFX11-SDAG-NEXT: s_ttracedata +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: ttracedata_v: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0 +; GFX11-GISEL-NEXT: s_ttracedata +; GFX11-GISEL-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata(i32 %val) + ret void +} + +define amdgpu_cs void @ttracedata_imm() { +; GFX11-LABEL: ttracedata_imm: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_ttracedata_imm 0x3e8 +; GFX11-NEXT: s_endpgm + call void @llvm.amdgcn.s.ttracedata.imm(i16 1000) + ret void +}