Skip to content

Commit

Permalink
AMDGPU: Fix implicit vcc def to vcc_lo on wave32 targets (llvm#109514)
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Sep 23, 2024
1 parent f28a035 commit 8632e8b
Show file tree
Hide file tree
Showing 17 changed files with 219 additions and 171 deletions.
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4514,7 +4514,6 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
// of vcc was already added during the initial BuildMI, but we
// 1) may need to change vcc to vcc_lo to preserve the original register
// 2) have to preserve the original flags.
fixImplicitOperands(*Inst32);
copyFlagsToImplicitVCC(*Inst32, *Src2);
continue;
}
Expand All @@ -4524,7 +4523,7 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
}

// FIXME: Losing implicit operands

fixImplicitOperands(*Inst32);
return Inst32;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
; GFX10-NEXT: s_cbranch_execz .LBB3_6
; GFX10-NEXT: .LBB3_2: ; %loop_start
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8
; GFX10-NEXT: s_mov_b32 s7, 1
; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8
; GFX10-NEXT: s_cbranch_vccz .LBB3_4
; GFX10-NEXT: ; %bb.3: ; %else
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr
; GFX10-NEXT: .LBB1_1: ; %loop.cond
; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-NEXT: v_add_co_u32 v1, s4, v1, 4
; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s4, 0, v2, s4
; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0
; GFX10-NEXT: s_andn2_b32 s7, s5, exec_lo
; GFX10-NEXT: s_and_b32 s8, exec_lo, s6
; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0
; GFX10-NEXT: s_or_b32 s4, s7, s8
; GFX10-NEXT: s_cbranch_vccz .LBB1_4
; GFX10-NEXT: .LBB1_2: ; %loop.start
Expand Down Expand Up @@ -191,9 +191,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: s_mov_b32 s5, 0
; GFX10-NEXT: s_mov_b32 s6, -1
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB3_6
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1323,9 +1323,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
Expand Down Expand Up @@ -1451,10 +1451,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
Expand Down Expand Up @@ -1587,9 +1586,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB2_2
Expand Down Expand Up @@ -3228,8 +3227,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB5_2
Expand Down Expand Up @@ -4991,9 +4990,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB8_2
Expand Down Expand Up @@ -5119,10 +5118,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB8_2
Expand Down Expand Up @@ -5255,9 +5253,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB8_2
Expand Down Expand Up @@ -6938,8 +6936,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16
; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB11_2
Expand Down
Loading

0 comments on commit 8632e8b

Please sign in to comment.