From 81b52215d093157d65fae7326efb4aeb7beefb0e Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Fri, 12 Jan 2024 14:24:02 -0800 Subject: [PATCH] Avoid module pass assert when compiling for O0 Change-Id: Ife52a918a9f9e7ba4980168693a2dbffb13e6e87 --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 8 ++ .../AMDGPU/AMDGPUResourceUsageAnalysis.cpp | 15 ++- .../AMDGPU/call-to-kernel-undefined.ll | 4 +- llvm/test/CodeGen/AMDGPU/call-to-kernel.ll | 4 +- .../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 24 ++--- llvm/test/CodeGen/AMDGPU/ipra.ll | 14 +-- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 65 +++++++----- .../AMDGPU/lower-module-lds-offsets.ll | 26 ++--- .../AMDGPU/module-lds-false-sharing.ll | 98 +++++++++---------- .../AMDGPU/resource-usage-dead-function.ll | 2 +- .../AMDGPU/uncalled-local-functions.ll | 89 +++++++++++++++++ .../AMDGPU/heterogeneous-dwarf-globals-dwo.ll | 2 +- .../AMDGPU/heterogeneous-dwarf-globals.ll | 16 +-- 13 files changed, 243 insertions(+), 124 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 37a36b26b947c6..c7a7b6114364b0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1267,6 +1267,14 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); + + // The Dummy pass is necessary because AMDGPUResourceUsageAnalysis will pop + // the CGSCC pass manager off of the active pass managers stack. Adding the + // Dummy pass will re-insert the CGSCC pass manager into said stack again + // through CallGraphSCCPass::assignPassManager. + AU.addRequired(); + AU.addPreserved(); + AsmPrinter::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index 326d0fa58dd15f..8a60dee6b5cb1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -155,10 +155,17 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) { SIFunctionResourceInfo &Info = CI.first->second; MachineFunction *MF = MMI.getMachineFunction(*F); - assert(MF && "function must have been generated already"); - Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects, - AssumedStackSizeForExternalCall); - HasIndirectCall |= Info.HasIndirectCall; + // We can only analyze resource usage of functions for which there exists a + // machinefunction equivalent. These may not exist as the (codegen) passes + // prior to this one are run in CGSCC order which will bypass any local + // functions that aren't called. + assert((MF || TPC->requiresCodeGenSCCOrder()) && + "function must have been generated already"); + if (MF) { + Info = analyzeResourceUsage(*MF, TM, AssumedStackSizeForDynamicSizeObjects, + AssumedStackSizeForExternalCall); + HasIndirectCall |= Info.HasIndirectCall; + } } if (HasIndirectCall) diff --git a/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll b/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll index d8e1eb911cf49a..203d51eba9db71 100644 --- a/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll +++ b/llvm/test/CodeGen/AMDGPU/call-to-kernel-undefined.ll @@ -1,6 +1,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s ; GCN: callee_kernel: ; GCN: s_endpgm +; GCN: __amdgpu_callee_kernel_kernel_body +; GCN: s_setpc_b64 define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 { entry: store volatile i32 0, i32 addrspace(1)* %out @@ -13,8 +15,6 @@ entry: ; GCN: s_addc_u32 s[[HI2:[0-9]+]], s[[HI1]], __amdgpu_callee_kernel_kernel_body@rel32@hi+12 ; GCN: s_swappc_b64 s[{{[0-9:]+}}], s{{\[}}[[LO2]]:[[HI2]]] ; GCN: s_endpgm -; GCN: __amdgpu_callee_kernel_kernel_body -; GCN: s_setpc_b64 define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 { entry: call void @callee_kernel(i32 addrspace(1)* %out) diff --git a/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll b/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll index 613c62c42f3cdf..cd99eb75405a4a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/call-to-kernel.ll @@ -2,6 +2,8 @@ ; GCN: callee_kernel: ; GCN: s_endpgm +; GCN: __amdgpu_callee_kernel_kernel_body +; GCN: s_setpc_b64 define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 { entry: store volatile i32 0, i32 addrspace(1)* %out @@ -14,8 +16,6 @@ entry: ; GCN: s_addc_u32 s[[HI2:[0-9]+]], s[[HI1]], __amdgpu_callee_kernel_kernel_body@rel32@hi+12 ; GCN: s_swappc_b64 s[{{[0-9:]+}}], s{{\[}}[[LO2]]:[[HI2]]] ; GCN: s_endpgm -; GCN: __amdgpu_callee_kernel_kernel_body -; GCN: s_setpc_b64 define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 { entry: call amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index d20f726549f92d..92c27032f4e041 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -3,6 +3,18 @@ declare i32 @llvm.amdgcn.workitem.id.x() +define <2 x i64> @f1() #0 { +; GFX11-LABEL: f1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: v_mov_b32_e32 v3, 0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + ret <2 x i64> zeroinitializer +} + define void @f0() { ; GFX11-LABEL: f0: ; GFX11: ; %bb.0: ; %bb @@ -35,18 +47,6 @@ bb: ret void } -define <2 x i64> @f1() #0 { -; GFX11-LABEL: f1: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-NEXT: s_setpc_b64 s[30:31] - ret <2 x i64> zeroinitializer -} - ; FIXME: This generates "instid1(/* invalid instid value */)". define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) { ; GFX11-LABEL: f2: diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll index 6c8646968b6762..ce3494f7391d6c 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -105,13 +105,6 @@ define void @test_funcx2() #0 { ret void } -; GCN-LABEL: {{^}}wombat: -define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) { -bb: - call void @hoge() #0 - ret void -} - ; Make sure we save/restore the return address around the call. ; Function Attrs: norecurse define internal void @hoge() #2 { @@ -128,6 +121,13 @@ bb: ret void } +; GCN-LABEL: {{^}}wombat: +define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) { +bb: + call void @hoge() #0 + ret void +} + declare dso_local void @eggs() diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 8fb9655f25223d..a1c985f264d10b 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -143,11 +143,14 @@ ; GCN-O0-NEXT: Machine Optimization Remark Emitter ; GCN-O0-NEXT: Stack Frame Layout Analysis ; GCN-O0-NEXT: Function register usage analysis -; GCN-O0-NEXT: FunctionPass Manager -; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O0-NEXT: Machine Optimization Remark Emitter -; GCN-O0-NEXT: AMDGPU Assembly Printer -; GCN-O0-NEXT: Free MachineFunction +; GCN-O0-NEXT: CallGraph Construction +; GCN-O0-NEXT: Call Graph SCC Pass Manager +; GCN-O0-NEXT: DummyCGSCCPass +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O0-NEXT: Machine Optimization Remark Emitter +; GCN-O0-NEXT: AMDGPU Assembly Printer +; GCN-O0-NEXT: Free MachineFunction ; GCN-O1:Target Library Information ; GCN-O1-NEXT:Target Pass Configuration @@ -412,11 +415,14 @@ ; GCN-O1-NEXT: Machine Optimization Remark Emitter ; GCN-O1-NEXT: Stack Frame Layout Analysis ; GCN-O1-NEXT: Function register usage analysis -; GCN-O1-NEXT: FunctionPass Manager -; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O1-NEXT: Machine Optimization Remark Emitter -; GCN-O1-NEXT: AMDGPU Assembly Printer -; GCN-O1-NEXT: Free MachineFunction +; GCN-O1-NEXT: CallGraph Construction +; GCN-O1-NEXT: Call Graph SCC Pass Manager +; GCN-O1-NEXT: DummyCGSCCPass +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Optimization Remark Emitter +; GCN-O1-NEXT: AMDGPU Assembly Printer +; GCN-O1-NEXT: Free MachineFunction ; GCN-O1-OPTS:Target Library Information ; GCN-O1-OPTS-NEXT:Target Pass Configuration @@ -709,11 +715,14 @@ ; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter ; GCN-O1-OPTS-NEXT: Stack Frame Layout Analysis ; GCN-O1-OPTS-NEXT: Function register usage analysis -; GCN-O1-OPTS-NEXT: FunctionPass Manager -; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter -; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer -; GCN-O1-OPTS-NEXT: Free MachineFunction +; GCN-O1-OPTS-NEXT: CallGraph Construction +; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager +; GCN-O1-OPTS-NEXT: DummyCGSCCPass +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter +; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer +; GCN-O1-OPTS-NEXT: Free MachineFunction ; GCN-O2:Target Library Information ; GCN-O2-NEXT:Target Pass Configuration @@ -1012,11 +1021,14 @@ ; GCN-O2-NEXT: Machine Optimization Remark Emitter ; GCN-O2-NEXT: Stack Frame Layout Analysis ; GCN-O2-NEXT: Function register usage analysis -; GCN-O2-NEXT: FunctionPass Manager -; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O2-NEXT: Machine Optimization Remark Emitter -; GCN-O2-NEXT: AMDGPU Assembly Printer -; GCN-O2-NEXT: Free MachineFunction +; GCN-O2-NEXT: CallGraph Construction +; GCN-O2-NEXT: Call Graph SCC Pass Manager +; GCN-O2-NEXT: DummyCGSCCPass +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Optimization Remark Emitter +; GCN-O2-NEXT: AMDGPU Assembly Printer +; GCN-O2-NEXT: Free MachineFunction ; GCN-O3:Target Library Information ; GCN-O3-NEXT:Target Pass Configuration @@ -1327,11 +1339,14 @@ ; GCN-O3-NEXT: Machine Optimization Remark Emitter ; GCN-O3-NEXT: Stack Frame Layout Analysis ; GCN-O3-NEXT: Function register usage analysis -; GCN-O3-NEXT: FunctionPass Manager -; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis -; GCN-O3-NEXT: Machine Optimization Remark Emitter -; GCN-O3-NEXT: AMDGPU Assembly Printer -; GCN-O3-NEXT: Free MachineFunction +; GCN-O3-NEXT: CallGraph Construction +; GCN-O3-NEXT: Call Graph SCC Pass Manager +; GCN-O3-NEXT: DummyCGSCCPass +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Optimization Remark Emitter +; GCN-O3-NEXT: AMDGPU Assembly Printer +; GCN-O3-NEXT: Free MachineFunction define void @empty() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll index 34dcdaf29677e4..6c89f59fb1c83a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll @@ -9,6 +9,19 @@ @lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1 @lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16 +; GCN-LABEL: {{^}}f0: +; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3 +; GCN: ds_write_b8 [[NULL]], [[TREE]] +define void @f0() { +; OPT-LABEL: @f0( +; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1 +; OPT-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1 + ret void +} + ; GCN-LABEL: {{^}}k0: ; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 @@ -29,16 +42,3 @@ define amdgpu_kernel void @k0() { call void @f0() ret void } - -; GCN-LABEL: {{^}}f0: -; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0 -; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3 -; GCN: ds_write_b8 [[NULL]], [[TREE]] -define void @f0() { -; OPT-LABEL: @f0() { -; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1 -; OPT-NEXT: ret void -; - store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll index b398e86403ffb1..2d236a2b8852c6 100644 --- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll +++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll @@ -24,6 +24,55 @@ store i32 0, ptr addrspace(3) @used_by_kernel } ; CHECK: ; LDSByteSize: 4 bytes +define void @nonkernel() { +; GFX9-LABEL: nonkernel: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-NEXT: ds_write_b32 v0, v0 offset:8 +; GFX9-NEXT: ds_write_b64 v0, v[0:1] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: nonkernel: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_mov_b32_e32 v1, v0 +; GFX10-NEXT: ds_write_b32 v0, v0 offset:8 +; GFX10-NEXT: ds_write_b64 v0, v[0:1] +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; G_GFX9-LABEL: nonkernel: +; G_GFX9: ; %bb.0: +; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; G_GFX9-NEXT: v_mov_b32_e32 v2, 0 +; G_GFX9-NEXT: v_mov_b32_e32 v3, 8 +; G_GFX9-NEXT: v_mov_b32_e32 v0, 0 +; G_GFX9-NEXT: v_mov_b32_e32 v1, 0 +; G_GFX9-NEXT: ds_write_b32 v3, v2 +; G_GFX9-NEXT: ds_write_b64 v2, v[0:1] +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_setpc_b64 s[30:31] +; +; G_GFX10-LABEL: nonkernel: +; G_GFX10: ; %bb.0: +; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; G_GFX10-NEXT: v_mov_b32_e32 v2, 0 +; G_GFX10-NEXT: v_mov_b32_e32 v3, 8 +; G_GFX10-NEXT: v_mov_b32_e32 v0, 0 +; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 +; G_GFX10-NEXT: ds_write_b32 v3, v2 +; G_GFX10-NEXT: ds_write_b64 v2, v[0:1] +; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: s_setpc_b64 s[30:31] + store i32 0, ptr addrspace(3) @used_by_both + store double 0.0, ptr addrspace(3) @used_by_function + ret void +} + ; Needs to allocate both variables, store to used_by_both is at sizeof(double) define amdgpu_kernel void @withcall() { ; GFX9-LABEL: withcall: @@ -140,54 +189,5 @@ define amdgpu_kernel void @nocall_false_sharing() { ; CHECK: ; LDSByteSize: 4 bytes -define void @nonkernel() { -; GFX9-LABEL: nonkernel: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-NEXT: ds_write_b32 v0, v0 offset:8 -; GFX9-NEXT: ds_write_b64 v0, v[0:1] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: nonkernel: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-NEXT: ds_write_b32 v0, v0 offset:8 -; GFX10-NEXT: ds_write_b64 v0, v[0:1] -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; G_GFX9-LABEL: nonkernel: -; G_GFX9: ; %bb.0: -; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v2, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v3, 8 -; G_GFX9-NEXT: v_mov_b32_e32 v0, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, 0 -; G_GFX9-NEXT: ds_write_b32 v3, v2 -; G_GFX9-NEXT: ds_write_b64 v2, v[0:1] -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_setpc_b64 s[30:31] -; -; G_GFX10-LABEL: nonkernel: -; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; G_GFX10-NEXT: v_mov_b32_e32 v2, 0 -; G_GFX10-NEXT: v_mov_b32_e32 v3, 8 -; G_GFX10-NEXT: v_mov_b32_e32 v0, 0 -; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 -; G_GFX10-NEXT: ds_write_b32 v3, v2 -; G_GFX10-NEXT: ds_write_b64 v2, v[0:1] -; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_setpc_b64 s[30:31] - store i32 0, ptr addrspace(3) @used_by_both - store double 0.0, ptr addrspace(3) @used_by_function - ret void -} - !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll index 503b3348757971..f1c5e9e469fe88 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll @@ -7,7 +7,7 @@ @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 -; GCN-LABEL: unreachable: +; GCN-NOT: unreachable: ; Function info: ; codeLenInByte = 4 define internal fastcc void @unreachable() { diff --git a/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll new file mode 100644 index 00000000000000..a0023b70f28b09 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll @@ -0,0 +1,89 @@ +; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a < %s | FileCheck %s +; REQUIRES: asserts + +@alias = internal alias i32, i32* @aliased_internal_func +@alias_taken = internal alias i32, i32* @aliased_taken_func + +; CHECK-NOT: internal_func +define internal i32 @internal_func() { + ret i32 0 +} + +; CHECK-NOT: private_func +define private i32 @private_func() { + ret i32 0 +} + +; CHECK-NOT: aliased_internal_func +define internal i32 @aliased_internal_func() { + ret i32 0 +} + +; CHECK-LABEL: take_alias_addr +; CHECK: Function info: +; CHECK-NEXT: codeLenInByte = 60 +; CHECK-NEXT: NumSgprs: 37 +; CHECK-NEXT: NumVgprs: 1 +; CHECK-NEXT: NumAgprs: 0 +; CHECK-NEXT: TotalNumVgprs: 1 +; CHECK-NEXT: ScratchSize: 16 +; CHECK-NEXT: MemoryBound: 0 +define void @take_alias_addr() { + %addr_loc = alloca ptr, addrspace(5) + store ptr @alias_taken, ptr addrspace(5) %addr_loc + ret void +} + +; CHECK: aliased_taken_func +; CHECK: Function info: +; CHECK-NEXT: codeLenInByte = 12 +; CHECK-NEXT: NumSgprs: 36 +; CHECK-NEXT: NumVgprs: 1 +; CHECK-NEXT: NumAgprs: 0 +; CHECK-NEXT: TotalNumVgprs: 1 +; CHECK-NEXT: ScratchSize: 0 +; CHECK-NEXT: MemoryBound: 0 +define internal i32 @aliased_taken_func() { + ret i32 0 +} + +; CHECK-LABEL: addr_taken +; CHECK: Function info: +; CHECK-NEXT: codeLenInByte = 12 +; CHECK-NEXT: NumSgprs: 36 +; CHECK-NEXT: NumVgprs: 1 +; CHECK-NEXT: NumAgprs: 0 +; CHECK-NEXT: TotalNumVgprs: 1 +; CHECK-NEXT: ScratchSize: 0 +; CHECK-NEXT: MemoryBound: 0 +define internal i32 @addr_taken() { + ret i32 0 +} + +; CHECK-LABEL: non_local +; CHECK: Function info: +; CHECK-NEXT: codeLenInByte = 12 +; CHECK-NEXT: NumSgprs: 36 +; CHECK-NEXT: NumVgprs: 1 +; CHECK-NEXT: NumAgprs: 0 +; CHECK-NEXT: TotalNumVgprs: 1 +; CHECK-NEXT: ScratchSize: 0 +; CHECK-NEXT: MemoryBound: 0 +define i32 @non_local() { + ret i32 0 +} + +; CHECK-LABEL: take_addr +; CHECK: Function info: +; CHECK-NEXT: codeLenInByte = 60 +; CHECK-NEXT: NumSgprs: 37 +; CHECK-NEXT: NumVgprs: 1 +; CHECK-NEXT: NumAgprs: 0 +; CHECK-NEXT: TotalNumVgprs: 1 +; CHECK-NEXT: ScratchSize: 16 +; CHECK-NEXT: MemoryBound: 0 +define void @take_addr() { + %addr_loc = alloca ptr, addrspace(5) + store ptr @addr_taken, ptr addrspace(5) %addr_loc + ret void +} diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals-dwo.ll b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals-dwo.ll index 8019d89be86f19..4438435501e83e 100644 --- a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals-dwo.ll +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals-dwo.ll @@ -39,7 +39,7 @@ ; DWARF-DWO-NEXT: DW_AT_external [DW_FORM_flag_present] ; DWARF-DWO-NEXT: DW_AT_decl_file [DW_FORM_data1] ; DWARF-DWO-NEXT: DW_AT_decl_line [DW_FORM_data1] -; DWARF-DWO-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_lit0, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_constu 0x3, DW_OP_LLVM_user DW_OP_LLVM_form_aspace_address) +; DWARF-DWO-NEXT: DW_AT_location [DW_FORM_exprloc] () ; DWARF-DWO: DW_TAG_variable ; DWARF-DWO-NEXT: DW_AT_name [{{DW_FORM_GNU_str_index|DW_FORM_strx1}}] ("FileVarDeviceConstant") diff --git a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals.ll b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals.ll index 48ea39277260e8..220d7f06d8c7f1 100644 --- a/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals.ll +++ b/llvm/test/DebugInfo/AMDGPU/heterogeneous-dwarf-globals.ll @@ -44,11 +44,11 @@ ; DWARF-NEXT: DW_AT_external [DW_FORM_flag_present] ; DWARF-NEXT: DW_AT_decl_file [DW_FORM_data1] ; DWARF-NEXT: DW_AT_decl_line [DW_FORM_data1] -; DWARF-ORIG-OPS-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_lit0, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_constu 0x3, DW_OP_LLVM_form_aspace_address) -; DWARF-USER-OPS-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_lit0, DW_OP_stack_value, DW_OP_deref_size 0x4, DW_OP_constu 0x3, DW_OP_LLVM_user DW_OP_LLVM_form_aspace_address) +; DWARF-ORIG-OPS-NEXT: DW_AT_location [DW_FORM_exprloc] () +; DWARF-USER-OPS-NEXT: DW_AT_location [DW_FORM_exprloc] () -; DWARF-ORIG-OPS: 0x00000053: DW_TAG_variable -; DWARF-USER-OPS: 0x00000055: DW_TAG_variable +; DWARF-ORIG-OPS: 0x0000004c: DW_TAG_variable +; DWARF-USER-OPS: 0x0000004d: DW_TAG_variable ; DWARF-NEXT: DW_AT_name [DW_FORM_strp] ("FileVarDeviceConstant") ; DWARF-NEXT: DW_AT_type [DW_FORM_ref4] ; DWARF-NEXT: DW_AT_external [DW_FORM_flag_present] @@ -61,10 +61,10 @@ ; RELOCS-OFF: RELOCATION RECORDS FOR ; 0x1e + 0x0d = 0x2b ; RELOCS: 000000000000002b R_AMDGPU_ABS64 FileVarDevice -; 0x53 + 0x0d = 0x60 -; RELOCS-OFF-USER-OPS: 0000000000000060 R_AMDGPU_ABS64 FileVarDeviceConstant -; 0x55 + 0x0d = 0x62 -; RELOCS-YES-USER-OPS: 0000000000000062 R_AMDGPU_ABS64 FileVarDeviceConstant +; 0x4c + 0x0d = 0x59 +; RELOCS-OFF-USER-OPS: 0000000000000059 R_AMDGPU_ABS64 FileVarDeviceConstant +; 0x4d + 0x0d = 0x5a +; RELOCS-YES-USER-OPS: 000000000000005a R_AMDGPU_ABS64 FileVarDeviceConstant target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa"