From 8632e8bd64d6f02e571777390274c262d5c85167 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 23 Sep 2024 13:20:21 +0400 Subject: [PATCH] AMDGPU: Fix implicit vcc def to vcc_lo on wave32 targets (#109514) --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +- ...-divergent-i1-phis-no-lane-mask-merging.ll | 2 +- ...vergence-divergent-i1-used-outside-loop.ll | 6 +- .../atomic_optimizations_global_pointer.ll | 18 ++-- .../atomic_optimizations_local_pointer.ll | 74 ++++++++--------- .../AMDGPU/global_atomics_scan_fadd.ll | 82 +++++++++---------- .../AMDGPU/global_atomics_scan_fmax.ll | 22 ++--- .../AMDGPU/global_atomics_scan_fmin.ll | 22 ++--- .../AMDGPU/global_atomics_scan_fsub.ll | 82 +++++++++---------- .../CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll | 2 +- .../AMDGPU/set-inactive-wwm-overwrite.ll | 4 +- .../AMDGPU/should-not-hoist-set-inactive.ll | 2 +- llvm/test/CodeGen/AMDGPU/shrink-true16.mir | 2 +- .../shrink-v-cmp-wave32-dead-vcc-lo.mir | 55 +++++++++++++ llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 2 +- llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll | 8 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 4 +- 17 files changed, 219 insertions(+), 171 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-v-cmp-wave32-dead-vcc-lo.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 579b37c7e33961..9344d0c54ac1fc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4514,7 +4514,6 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, // of vcc was already added during the initial BuildMI, but we // 1) may need to change vcc to vcc_lo to preserve the original register // 2) have to preserve the original flags. - fixImplicitOperands(*Inst32); copyFlagsToImplicitVCC(*Inst32, *Src2); continue; } @@ -4524,7 +4523,7 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, } // FIXME: Losing implicit operands - + fixImplicitOperands(*Inst32); return Inst32; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll index bb7bc0447aea04..c5ded11c7d3234 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll @@ -167,8 +167,8 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: .LBB3_2: ; %loop_start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8 ; GFX10-NEXT: s_mov_b32 s7, 1 +; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8 ; GFX10-NEXT: s_cbranch_vccz .LBB3_4 ; GFX10-NEXT: ; %bb.3: ; %else ; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index 49c232661c6dc1..b27d8fdc24ff73 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -75,12 +75,12 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr ; GFX10-NEXT: .LBB1_1: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-NEXT: v_add_co_u32 v1, s4, v1, 4 +; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s4, 0, v2, s4 -; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0 ; GFX10-NEXT: s_andn2_b32 s7, s5, exec_lo ; GFX10-NEXT: s_and_b32 s8, exec_lo, s6 +; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0 ; GFX10-NEXT: s_or_b32 s4, s7, s8 ; GFX10-NEXT: s_cbranch_vccz .LBB1_4 ; GFX10-NEXT: .LBB1_2: ; %loop.start @@ -191,9 +191,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts, ; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, -1 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB3_6 ; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index b17dfc7c3754a1..ce608df44dc434 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -1323,9 +1323,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s0, s2 ; GFX1032_DPP-NEXT: s_mov_b32 s2, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2 @@ -1451,10 +1451,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s4, s6 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2 @@ -1587,9 +1586,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1232_DPP-NEXT: s_wait_alu 0xfffe ; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1232_DPP-NEXT: s_mov_b32 s4, s6 ; GFX1232_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1232_DPP-NEXT: s_cbranch_execz .LBB2_2 @@ -3228,8 +3227,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s2, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB5_2 @@ -4991,9 +4990,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s0, s2 ; GFX1032_DPP-NEXT: s_mov_b32 s2, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB8_2 @@ -5119,10 +5118,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s4, s6 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB8_2 @@ -5255,9 +5253,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1232_DPP-NEXT: s_wait_alu 0xfffe ; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1232_DPP-NEXT: s_mov_b32 s4, s6 ; GFX1232_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1232_DPP-NEXT: s_cbranch_execz .LBB8_2 @@ -6938,8 +6936,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace( ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s2, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB11_2 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 988bc8eec6e517..ce90fbed813103 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -936,8 +936,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2 @@ -1047,8 +1047,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2 @@ -2684,8 +2684,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s6, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s5, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB6_2 @@ -2874,8 +2874,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v1, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v2, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB6_2 @@ -3383,8 +3383,8 @@ define amdgpu_kernel void @add_i64_varying_nouse() { ; GFX1032_DPP-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 ; GFX1032_DPP-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_mbcnt_lo_u32_b32 v11, exec_lo, 0 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v9, v1 +; GFX1032_DPP-NEXT: v_mbcnt_lo_u32_b32 v11, exec_lo, 0 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032_DPP-NEXT: v_mov_b32_e32 v10, v2 ; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v11 @@ -4444,8 +4444,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB10_2 @@ -4555,8 +4555,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB10_2 @@ -6218,8 +6218,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s6, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s5, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB14_2 @@ -6408,8 +6408,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v1, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v2, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr8_vgpr9 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB14_2 @@ -6915,8 +6915,8 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB15_2 @@ -7026,9 +7026,8 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB15_2 @@ -7627,8 +7626,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB16_2 @@ -7786,8 +7785,8 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB16_2 @@ -8294,8 +8293,8 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB17_2 @@ -8405,8 +8404,8 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB17_2 @@ -9006,8 +9005,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB18_2 @@ -9165,8 +9164,8 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB18_2 @@ -9673,8 +9672,8 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB19_2 @@ -9784,8 +9783,8 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB19_2 @@ -10385,8 +10384,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB20_2 @@ -10544,8 +10543,8 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v6, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v5, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v7 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB20_2 @@ -11051,8 +11050,8 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB21_2 @@ -11162,9 +11161,8 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB21_2 @@ -12196,8 +12194,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB23_2 @@ -12415,8 +12413,8 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB23_2 @@ -12923,8 +12921,8 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB24_2 @@ -13034,9 +13032,8 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB24_2 @@ -14788,8 +14785,8 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB27_2 @@ -14899,8 +14896,8 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB27_2 @@ -15909,8 +15906,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB29_2 @@ -16125,8 +16122,8 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB29_2 @@ -16633,8 +16630,8 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB30_2 @@ -16744,9 +16741,8 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: s_or_saveexec_b32 s0, -1 ; GFX1132_DPP-NEXT: v_writelane_b32 v3, s1, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB30_2 @@ -17754,8 +17750,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1032_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1032_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1032_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032_DPP-NEXT: s_cbranch_execz .LBB32_2 @@ -17970,8 +17966,8 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) { ; GFX1132_DPP-NEXT: v_writelane_b32 v2, s5, 16 ; GFX1132_DPP-NEXT: v_writelane_b32 v1, s6, 16 ; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4 -; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: s_mov_b32 s6, -1 +; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 ; GFX1132_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10 ; GFX1132_DPP-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1132_DPP-NEXT: s_cbranch_execz .LBB32_2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 2b18f472c8c402..c3a197ce99859f 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -1263,16 +1263,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB2_3 ; GFX1032-NEXT: ; %bb.1: @@ -1483,16 +1483,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -2471,16 +2471,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB4_3 ; GFX1032-NEXT: ; %bb.1: @@ -2721,16 +2721,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_agent_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -4503,16 +4503,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-NEXT: ; %bb.1: @@ -4753,16 +4753,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_uni_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_uni_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -5929,19 +5929,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s33, s8 ; GFX1032-NEXT: s_mov_b32 s8, exec_lo -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-NEXT: ; %bb.1: @@ -6378,19 +6378,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s8 ; GFX1032-DPP-NEXT: s_mov_b32 s8, exec_lo -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -7595,8 +7595,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -8020,16 +8020,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-NEXT: ; %bb.1: @@ -8277,16 +8277,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -9107,8 +9107,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -9444,16 +9444,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB13_3 ; GFX1032-NEXT: ; %bb.1: @@ -9701,16 +9701,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -10531,8 +10531,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -11437,8 +11437,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -13574,8 +13574,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index e3144ae24ae8d6..69c6adf0300c22 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -3348,17 +3348,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-NEXT: ; %bb.1: @@ -3778,17 +3778,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -5038,8 +5038,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] ; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -6403,8 +6403,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] ; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -6844,17 +6844,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-NEXT: ; %bb.1: @@ -7274,17 +7274,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -8534,8 +8534,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] ; GFX1032-DPP-NEXT: v_max_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index ddc103184cdf35..b7890f30f77603 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -3348,17 +3348,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-NEXT: ; %bb.1: @@ -3778,17 +3778,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -5038,8 +5038,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] ; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -6403,8 +6403,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] ; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -6844,17 +6844,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-NEXT: ; %bb.1: @@ -7274,17 +7274,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -8534,8 +8534,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] ; GFX1032-DPP-NEXT: v_min_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index f353edff1b477a..fcd5d0dc497e67 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -1367,16 +1367,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB2_3 ; GFX1032-NEXT: ; %bb.1: @@ -1617,16 +1617,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB2_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -2687,16 +2687,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB4_3 ; GFX1032-NEXT: ; %bb.1: @@ -2937,16 +2937,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB4_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -4823,16 +4823,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-NEXT: ; %bb.1: @@ -5073,16 +5073,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_uni_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_uni_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -6249,19 +6249,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: s_mov_b32 s33, s8 ; GFX1032-NEXT: s_mov_b32 s8, exec_lo -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s48, s48, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-NEXT: s_mov_b32 s44, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-NEXT: ; %bb.1: @@ -6698,19 +6698,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s8 ; GFX1032-DPP-NEXT: s_mov_b32 s8, exec_lo -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s8, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -7915,8 +7915,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 @@ -8340,16 +8340,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-NEXT: ; %bb.1: @@ -8597,16 +8597,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -9426,8 +9426,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -9763,16 +9763,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-NEXT: s_mov_b32 s14, -1 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB13_3 ; GFX1032-NEXT: ; %bb.1: @@ -10020,16 +10020,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GFX1032-DPP-NEXT: s_mov_b32 s14, -1 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s15, 0x31c16000 ; GFX1032-DPP-NEXT: s_add_u32 s12, s12, s9 -; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_addc_u32 s13, s13, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s4, 0 +; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB13_3 ; GFX1032-DPP-NEXT: ; %bb.1: @@ -10850,8 +10850,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -11756,8 +11756,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_permlanex16_b32 v5, v3, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], v[3:4], v[5:6] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, v3 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-DPP-NEXT: s_mov_b32 s2, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 @@ -13892,8 +13892,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_permlanex16_b32 v10, v8, 0, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[8:9], v[8:9], v[10:11] ; GFX1032-DPP-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 +; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 ; GFX1032-DPP-NEXT: s_mov_b32 s44, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll index 684ca3aac7c315..004a720b9ab486 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll @@ -216,8 +216,8 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) { ; GFX10-32-NEXT: s_mov_b32 s1, exec_lo ; GFX10-32-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-32-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10-32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX10-32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_and_saveexec_b32 s2, s0 ; GFX10-32-NEXT: s_xor_b32 s0, exec_lo, s2 ; GFX10-32-NEXT: s_cbranch_execz .LBB2_3 diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll index f60786c1bacbff..6f841c88a6d8bb 100644 --- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll +++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll @@ -4,8 +4,8 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) { ; GCN-LABEL: if_then: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GCN-NEXT: ; %bb.1: ; %.bb0 ; GCN-NEXT: v_mov_b32_e32 v3, 1 @@ -60,8 +60,8 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrspace(8) inreg %output, <3 x i32> %LocalInvocationId) { ; GCN-LABEL: if_else_vgpr_opt: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GCN-NEXT: ; %bb.1: ; %.bb0 ; GCN-NEXT: v_mov_b32_e32 v3, 1 diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll index 90b32e29e98f67..3519befabd3bc7 100644 --- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll @@ -4,10 +4,10 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 inreg %v, i32 %lane, i32 %f, i32 %f2) #0 { ; GCN-LABEL: should_not_hoist_set_inactive: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 3, v1 ; GCN-NEXT: v_cmp_eq_u32_e64 s5, 0, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s6, 0, v2 ; GCN-NEXT: s_mov_b32 s7, 0 +; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 3, v1 ; GCN-NEXT: s_branch .LBB0_2 ; GCN-NEXT: .LBB0_1: ; %bb4 ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 diff --git a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir index 1a7ec5db9efa2a..be759049bc3a7d 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-true16.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-true16.mir @@ -11,7 +11,7 @@ body: | ; GFX1100-LABEL: name: 16bit_lo128_shrink ; GFX1100: liveins: $vgpr127 ; GFX1100-NEXT: {{ $}} - ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc, implicit $exec, implicit $exec + ; GFX1100-NEXT: V_CMP_EQ_U16_t16_e32 0, $vgpr127, implicit-def $vcc_lo, implicit $exec, implicit $exec $vcc_lo = V_CMP_EQ_U16_t16_e64 0, $vgpr127, implicit-def $vcc, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-v-cmp-wave32-dead-vcc-lo.mir b/llvm/test/CodeGen/AMDGPU/shrink-v-cmp-wave32-dead-vcc-lo.mir new file mode 100644 index 00000000000000..73c55265af20b9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-v-cmp-wave32-dead-vcc-lo.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-shrink-instructions -mcpu=gfx1100 -o - %s | FileCheck %s + +# Make sure there's no crash when shrinking a v_cmp on a wave32 target +# when the def is dead. Previously the vcc implicit def wasn't +# properly replaced with vcc_lo, so the expected implicit operand was +# not found in the shrunk instruction. + +--- +name: shrink_v_cmp_vcc_lo_dead +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 + isCalleeSavedInfoValid: true +machineFunctionInfo: + maxKernArgAlign: 1 + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: shrink_v_cmp_vcc_lo_dead + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: V_CMP_LT_U32_e32 $vgpr0, $vgpr1, implicit-def dead $vcc_lo, implicit $exec + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 + dead renamable $vcc_lo = V_CMP_LT_U32_e64 $vgpr0, $vgpr1, implicit $exec + S_SETPC_B64_return undef $sgpr30_sgpr31 + +... + +--- +name: shrink_v_cmp_vcc_lo_live +tracksRegLiveness: true +tracksDebugUserValues: true +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 + isCalleeSavedInfoValid: true +machineFunctionInfo: + maxKernArgAlign: 1 + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: shrink_v_cmp_vcc_lo_live + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: V_CMP_LT_U32_e32 $vgpr0, $vgpr1, implicit-def $vcc_lo, implicit $exec + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vcc_lo + renamable $vcc_lo = V_CMP_LT_U32_e64 $vgpr0, $vgpr1, implicit $exec + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vcc_lo + +... diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index eebd32cd67e6e6..8e0a83671a1837 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1027,8 +1027,8 @@ define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { ; ; GFX10-WAVE32-LABEL: test_kill_divergent_loop: ; GFX10-WAVE32: ; %bb.0: ; %entry -; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo +; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll index 25d8300eb45835..a0bce3432a4bd0 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll @@ -86,8 +86,8 @@ end: define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 { ; SI-LABEL: else3: ; SI: ; %bb.0: ; %entry -; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 ; SI-NEXT: s_mov_b32 s1, 0 +; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 ; SI-NEXT: s_branch .LBB2_2 ; SI-NEXT: .LBB2_1: ; %if.end ; SI-NEXT: ; in Loop: Header=BB2_2 Depth=1 @@ -161,16 +161,16 @@ for.end: define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_func, ptr %extern_func2) #0 { ; SI-LABEL: loop: ; SI: ; %bb.0: ; %main_body -; SI-NEXT: v_mov_b32_e32 v6, v0 ; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; SI-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: v_mov_b32_e32 v6, v0 ; SI-NEXT: v_mov_b32_e32 v0, v1 -; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6 ; SI-NEXT: s_mov_b32 s15, 0x31c16000 ; SI-NEXT: s_add_u32 s12, s12, s1 ; SI-NEXT: s_addc_u32 s13, s13, 0 ; SI-NEXT: s_mov_b32 s32, 0 +; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6 ; SI-NEXT: ; implicit-def: $vgpr1 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo ; SI-NEXT: s_xor_b32 s6, exec_lo, s0 @@ -243,11 +243,11 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; SI-NEXT: s_mov_b32 s14, -1 ; SI-NEXT: v_mov_b32_e32 v40, v1 -; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 ; SI-NEXT: s_mov_b32 s15, 0x31c16000 ; SI-NEXT: s_add_u32 s12, s12, s1 ; SI-NEXT: s_addc_u32 s13, s13, 0 ; SI-NEXT: s_mov_b32 s32, 0 +; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 ; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo ; SI-NEXT: s_xor_b32 s6, exec_lo, s0 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 92117e0688f65c..4576d829b0cb0a 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -372,8 +372,8 @@ define amdgpu_kernel void @test_loop_with_if(ptr addrspace(1) %arg) #0 { ; GFX1032-NEXT: .LBB10_2: ; %bb2 ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: v_cmp_ge_i32_e64 s4, v1, v0 -; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v1, v0 ; GFX1032-NEXT: s_mov_b32 s3, 0 +; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v1, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_4 ; GFX1032-NEXT: ; %bb.3: ; %bb5 @@ -515,8 +515,8 @@ bb13: define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #0 { ; GFX1032-LABEL: test_loop_with_if_else_break: ; GFX1032: ; %bb.0: ; %bb -; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB11_6 ; GFX1032-NEXT: ; %bb.1: ; %.preheader