Skip to content

Commit

Permalink
Avoid module pass assert when compiling for O0
Browse files Browse the repository at this point in the history
Change-Id: Ife52a918a9f9e7ba4980168693a2dbffb13e6e87
  • Loading branch information
JanekvO authored and bcahoon committed Feb 22, 2024
1 parent 1af3232 commit 81b5221
Show file tree
Hide file tree
Showing 13 changed files with 243 additions and 124 deletions.
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1267,6 +1267,14 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AMDGPUResourceUsageAnalysis>();
AU.addPreserved<AMDGPUResourceUsageAnalysis>();

// The Dummy pass is necessary because AMDGPUResourceUsageAnalysis will pop
// the CGSCC pass manager off of the active pass managers stack. Adding the
// Dummy pass will re-insert the CGSCC pass manager into said stack again
// through CallGraphSCCPass::assignPassManager.
AU.addRequired<DummyCGSCCPass>();
AU.addPreserved<DummyCGSCCPass>();

AsmPrinter::getAnalysisUsage(AU);
}

Expand Down
15 changes: 11 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,17 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {

SIFunctionResourceInfo &Info = CI.first->second;
MachineFunction *MF = MMI.getMachineFunction(*F);
assert(MF && "function must have been generated already");
Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
AssumedStackSizeForExternalCall);
HasIndirectCall |= Info.HasIndirectCall;
// We can only analyze resource usage of functions for which there exists a
// machinefunction equivalent. These may not exist as the (codegen) passes
// prior to this one are run in CGSCC order which will bypass any local
// functions that aren't called.
assert((MF || TPC->requiresCodeGenSCCOrder()) &&
"function must have been generated already");
if (MF) {
Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects,
AssumedStackSizeForExternalCall);
HasIndirectCall |= Info.HasIndirectCall;
}
}

if (HasIndirectCall)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
; GCN: callee_kernel:
; GCN: s_endpgm
; GCN: __amdgpu_callee_kernel_kernel_body
; GCN: s_setpc_b64
define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 {
entry:
store volatile i32 0, i32 addrspace(1)* %out
Expand All @@ -13,8 +15,6 @@ entry:
; GCN: s_addc_u32 s[[HI2:[0-9]+]], s[[HI1]], __amdgpu_callee_kernel_kernel_body@rel32@hi+12
; GCN: s_swappc_b64 s[{{[0-9:]+}}], s{{\[}}[[LO2]]:[[HI2]]]
; GCN: s_endpgm
; GCN: __amdgpu_callee_kernel_kernel_body
; GCN: s_setpc_b64
define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 {
entry:
call void @callee_kernel(i32 addrspace(1)* %out)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/call-to-kernel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

; GCN: callee_kernel:
; GCN: s_endpgm
; GCN: __amdgpu_callee_kernel_kernel_body
; GCN: s_setpc_b64
define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 {
entry:
store volatile i32 0, i32 addrspace(1)* %out
Expand All @@ -14,8 +16,6 @@ entry:
; GCN: s_addc_u32 s[[HI2:[0-9]+]], s[[HI1]], __amdgpu_callee_kernel_kernel_body@rel32@hi+12
; GCN: s_swappc_b64 s[{{[0-9:]+}}], s{{\[}}[[LO2]]:[[HI2]]]
; GCN: s_endpgm
; GCN: __amdgpu_callee_kernel_kernel_body
; GCN: s_setpc_b64
define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 {
entry:
call amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@

declare i32 @llvm.amdgcn.workitem.id.x()

define <2 x i64> @f1() #0 {
; GFX11-LABEL: f1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v3, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret <2 x i64> zeroinitializer
}

define void @f0() {
; GFX11-LABEL: f0:
; GFX11: ; %bb.0: ; %bb
Expand Down Expand Up @@ -35,18 +47,6 @@ bb:
ret void
}

define <2 x i64> @f1() #0 {
; GFX11-LABEL: f1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v3, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret <2 x i64> zeroinitializer
}

; FIXME: This generates "instid1(/* invalid instid value */)".
define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
; GFX11-LABEL: f2:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/ipra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,6 @@ define void @test_funcx2() #0 {
ret void
}

; GCN-LABEL: {{^}}wombat:
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
bb:
call void @hoge() #0
ret void
}

; Make sure we save/restore the return address around the call.
; Function Attrs: norecurse
define internal void @hoge() #2 {
Expand All @@ -128,6 +121,13 @@ bb:
ret void
}

; GCN-LABEL: {{^}}wombat:
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
bb:
call void @hoge() #0
ret void
}

declare dso_local void @eggs()


Expand Down
65 changes: 40 additions & 25 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,14 @@
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: Stack Frame Layout Analysis
; GCN-O0-NEXT: Function register usage analysis
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: AMDGPU Assembly Printer
; GCN-O0-NEXT: Free MachineFunction
; GCN-O0-NEXT: CallGraph Construction
; GCN-O0-NEXT: Call Graph SCC Pass Manager
; GCN-O0-NEXT: DummyCGSCCPass
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: AMDGPU Assembly Printer
; GCN-O0-NEXT: Free MachineFunction

; GCN-O1:Target Library Information
; GCN-O1-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -412,11 +415,14 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Stack Frame Layout Analysis
; GCN-O1-NEXT: Function register usage analysis
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: AMDGPU Assembly Printer
; GCN-O1-NEXT: Free MachineFunction
; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Call Graph SCC Pass Manager
; GCN-O1-NEXT: DummyCGSCCPass
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: AMDGPU Assembly Printer
; GCN-O1-NEXT: Free MachineFunction

; GCN-O1-OPTS:Target Library Information
; GCN-O1-OPTS-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -709,11 +715,14 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Stack Frame Layout Analysis
; GCN-O1-OPTS-NEXT: Function register usage analysis
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
; GCN-O1-OPTS-NEXT: Free MachineFunction
; GCN-O1-OPTS-NEXT: CallGraph Construction
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
; GCN-O1-OPTS-NEXT: DummyCGSCCPass
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
; GCN-O1-OPTS-NEXT: Free MachineFunction

; GCN-O2:Target Library Information
; GCN-O2-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -1012,11 +1021,14 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Stack Frame Layout Analysis
; GCN-O2-NEXT: Function register usage analysis
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: AMDGPU Assembly Printer
; GCN-O2-NEXT: Free MachineFunction
; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Call Graph SCC Pass Manager
; GCN-O2-NEXT: DummyCGSCCPass
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: AMDGPU Assembly Printer
; GCN-O2-NEXT: Free MachineFunction

; GCN-O3:Target Library Information
; GCN-O3-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -1327,11 +1339,14 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Stack Frame Layout Analysis
; GCN-O3-NEXT: Function register usage analysis
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: AMDGPU Assembly Printer
; GCN-O3-NEXT: Free MachineFunction
; GCN-O3-NEXT: CallGraph Construction
; GCN-O3-NEXT: Call Graph SCC Pass Manager
; GCN-O3-NEXT: DummyCGSCCPass
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: AMDGPU Assembly Printer
; GCN-O3-NEXT: Free MachineFunction

define void @empty() {
ret void
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16

; GCN-LABEL: {{^}}f0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
; GCN: ds_write_b8 [[NULL]], [[TREE]]
define void @f0() {
; OPT-LABEL: @f0(
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: ret void
;
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
ret void
}

; GCN-LABEL: {{^}}k0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
Expand All @@ -29,16 +42,3 @@ define amdgpu_kernel void @k0() {
call void @f0()
ret void
}

; GCN-LABEL: {{^}}f0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
; GCN: ds_write_b8 [[NULL]], [[TREE]]
define void @f0() {
; OPT-LABEL: @f0() {
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: ret void
;
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
ret void
}
98 changes: 49 additions & 49 deletions llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,55 @@ store i32 0, ptr addrspace(3) @used_by_kernel
}
; CHECK: ; LDSByteSize: 4 bytes

define void @nonkernel() {
; GFX9-LABEL: nonkernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: nonkernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX9-LABEL: nonkernel:
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
; G_GFX9-NEXT: ds_write_b32 v3, v2
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX10-LABEL: nonkernel:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
; G_GFX10-NEXT: ds_write_b32 v3, v2
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
store i32 0, ptr addrspace(3) @used_by_both
store double 0.0, ptr addrspace(3) @used_by_function
ret void
}

; Needs to allocate both variables, store to used_by_both is at sizeof(double)
define amdgpu_kernel void @withcall() {
; GFX9-LABEL: withcall:
Expand Down Expand Up @@ -140,54 +189,5 @@ define amdgpu_kernel void @nocall_false_sharing() {
; CHECK: ; LDSByteSize: 4 bytes


define void @nonkernel() {
; GFX9-LABEL: nonkernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: nonkernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX9-LABEL: nonkernel:
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
; G_GFX9-NEXT: ds_write_b32 v3, v2
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX10-LABEL: nonkernel:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
; G_GFX10-NEXT: ds_write_b32 v3, v2
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
store i32 0, ptr addrspace(3) @used_by_both
store double 0.0, ptr addrspace(3) @used_by_function
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4

; GCN-LABEL: unreachable:
; GCN-NOT: unreachable:
; Function info:
; codeLenInByte = 4
define internal fastcc void @unreachable() {
Expand Down
Loading

0 comments on commit 81b5221

Please sign in to comment.