diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 326d0fa58dd15..2fe9cd242ff19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -290,14 +290,12 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( case AMDGPU::SRC_PRIVATE_BASE: case AMDGPU::SRC_PRIVATE_LIMIT_LO: case AMDGPU::SRC_PRIVATE_LIMIT: + case AMDGPU::SRC_POPS_EXITING_WAVE_ID: case AMDGPU::SGPR_NULL: case AMDGPU::SGPR_NULL64: case AMDGPU::MODE: continue; - case AMDGPU::SRC_POPS_EXITING_WAVE_ID: - llvm_unreachable("src_pops_exiting_wave_id should not be used"); - case AMDGPU::NoRegister: assert(MI.isDebugInstr() && "Instruction uses invalid noreg register"); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll index f3c5ac757e22b..dbe95a8091932 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,SDAG,GFX9-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,SDAG,GFX10-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL + +declare void @foo(i32) define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) { ; SDAG-LABEL: test: @@ -34,35 +36,25 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) { } define amdgpu_ps void @test_loop() { -; SDAG-LABEL: test_loop: -; SDAG: ; %bb.0: -; SDAG-NEXT: .LBB1_1: ; %loop -; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 -; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id -; SDAG-NEXT: s_cmp_eq_u32 s0, 0 -; SDAG-NEXT: s_cbranch_scc1 .LBB1_1 -; SDAG-NEXT: ; %bb.2: ; %exit -; SDAG-NEXT: s_endpgm +; GFX9-LABEL: test_loop: +; GFX9: ; %bb.0: +; GFX9-NEXT: .LBB1_1: ; %loop +; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX9-NEXT: s_cmp_eq_u32 s0, 0 +; GFX9-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9-NEXT: ; %bb.2: ; %exit +; GFX9-NEXT: s_endpgm ; -; GFX9-GISEL-LABEL: test_loop: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: .LBB1_1: ; %loop -; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id -; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0 -; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1 -; GFX9-GISEL-NEXT: ; %bb.2: ; %exit -; GFX9-GISEL-NEXT: s_endpgm -; -; GFX10-GISEL-LABEL: test_loop: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: .LBB1_1: ; %loop -; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id -; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0 -; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1 -; GFX10-GISEL-NEXT: ; %bb.2: ; %exit -; GFX10-GISEL-NEXT: s_endpgm +; GFX10-LABEL: test_loop: +; GFX10: ; %bb.0: +; GFX10-NEXT: .LBB1_1: ; %loop +; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX10-NEXT: s_cmp_eq_u32 s0, 0 +; GFX10-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX10-NEXT: ; %bb.2: ; %exit +; GFX10-NEXT: s_endpgm br label %loop loop: %id = call i32 @llvm.amdgcn.pops.exiting.wave.id() @@ -117,3 +109,77 @@ exit: %id = phi i32 [ %id1, %entry ], [ %id2, %body ] ret i32 %id } + +define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { +; GFX9-SDAG-LABEL: test_call: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2 +; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1] +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 +; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36 +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: test_call: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2 +; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] +; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 +; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36 +; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX10-LABEL: test_call: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s2 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_getpc_b64 s[0:1] +; GFX10-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 +; GFX10-NEXT: s_mov_b64 s[8:9], 36 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id +; GFX10-NEXT: s_mov_b32 s32, 0 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.pops.exiting.wave.id() + call void @foo(i32 %id) + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10-SDAG: {{.*}}