From e548161fb43188b1658a5a1c799e0c12a7831609 Mon Sep 17 00:00:00 2001 From: Dhruva Chakrabarti Date: Mon, 26 Aug 2024 18:55:45 -0500 Subject: [PATCH] [Clang] [OpenMP] Initialize IsSPMDMode for specialized kernels. Fixes SWDEV-477754: 513.soma fails after driver update. Without this initialization, the attributor's liveness analysis deduces that parts of the kernel are unreachable and can be removed. Change-Id: Ie51084dda5ecd172eeb47b1024da0c920efaff69 --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 5 + clang/lib/CodeGen/CGOpenMPRuntimeGPU.h | 3 + clang/lib/CodeGen/CGStmt.cpp | 15 ++- .../test/OpenMP/amdgcn_target_device_vla.cpp | 1 + .../amdgpu_target_with_aligned_attribute.c | 1 + clang/test/OpenMP/big_jump_loop_codegen.cpp | 2 + .../OpenMP/big_jump_loop_split_codegen.cpp | 6 ++ clang/test/OpenMP/fast_red_codegen.cpp | 22 +++- clang/test/OpenMP/no_loop_codegen.cpp | 8 ++ clang/test/OpenMP/no_loop_split_codegen.cpp | 8 ++ .../target_teams_generic_loop_codegen-2.cpp | 7 ++ ...s_generic_loop_codegen_as_parallel_for.cpp | 2 + ...get_teams_loop_codegen_as_parallel_for.cpp | 2 + clang/test/OpenMP/xteam_red_callee.cpp | 102 +++++++++--------- clang/test/OpenMP/xteam_red_codegen.cpp | 22 +++- clang/test/OpenMP/xteam_red_small_precision.c | 3 + clang/test/OpenMP/xteam_red_split_codegen.cpp | 12 +++ .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 + offload/DeviceRTL/include/Interface.h | 3 + offload/DeviceRTL/src/Kernel.cpp | 2 + 20 files changed, 169 insertions(+), 60 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index b0d4df1a15f6f0..6500b380751db5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2596,6 +2596,11 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumBlocks(CodeGenFunction &CGF) { CGM.getModule(), OMPRTL___kmpc_get_hardware_num_blocks)); } +llvm::Value *CGOpenMPRuntimeGPU::initSpecializedKernel(CodeGenFunction &CGF) { + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_specialized_kernel_init)); +} + std::pair CGOpenMPRuntimeGPU::getXteamRedFunctionPtrs(CodeGenFunction &CGF, llvm::Type *RedVarType) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index b06b5f3bd16db5..d31ae74a3af50f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -165,6 +165,9 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { /// Get the number of blocks on the GPU llvm::Value *getGPUNumBlocks(CodeGenFunction &CGF); + /// Initialization for a specialized kernel. + llvm::Value *initSpecializedKernel(CodeGenFunction &CGF); + std::pair getXteamRedFunctionPtrs(CodeGenFunction &CGF, llvm::Type *RedVarType); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index b12257927cbd2e..f70a2bef3a0552 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -319,15 +319,18 @@ void CodeGenFunction::EmitNoLoopCode(const OMPExecutableDirective &D, const ForStmt *CapturedForStmt, SourceLocation Loc) { assert(isa(D) && "Unexpected directive"); + const OMPLoopDirective &LD = cast(D); + auto &RT = static_cast(CGM.getOpenMPRuntime()); + + // Initialize a specialized kernel. + RT.initSpecializedKernel(*this); auto IVPair = EmitNoLoopIV(LD); const VarDecl *IVDecl = IVPair.first; Address IvAddr = IVPair.second; // Generate myid = workgroup_id * workgroup_size + workitem_id - auto &RT = static_cast(CGM.getOpenMPRuntime()); - // workitem_id llvm::Value *GpuThreadId = RT.getGPUThreadID(*this); @@ -385,6 +388,9 @@ void CodeGenFunction::EmitNoLoopCode(const OMPExecutableDirective &D, void CodeGenFunction::EmitBigJumpLoopCode(const OMPExecutableDirective &D, const ForStmt *CapturedForStmt, SourceLocation Loc) { + auto &RT = static_cast(CGM.getOpenMPRuntime()); + // Initialize a specialized kernel. + RT.initSpecializedKernel(*this); EmitStmt(CapturedForStmt); } @@ -396,6 +402,11 @@ void CodeGenFunction::EmitXteamRedCode(const OMPExecutableDirective &D, // generated CGM.setCurrentXteamRedStmt(CapturedForStmt); + auto &RT = static_cast(CGM.getOpenMPRuntime()); + + // Initialize a specialized kernel. + RT.initSpecializedKernel(*this); + EmitXteamLocalAggregator(CapturedForStmt); // Now emit the modified loop. If there is a statement in the loop with a diff --git a/clang/test/OpenMP/amdgcn_target_device_vla.cpp b/clang/test/OpenMP/amdgcn_target_device_vla.cpp index 53ad3019646bec..6fb7153f72b3c9 100644 --- a/clang/test/OpenMP/amdgcn_target_device_vla.cpp +++ b/clang/test/OpenMP/amdgcn_target_device_vla.cpp @@ -215,6 +215,7 @@ int main() { // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c index cb860adcb70c00..de886d887534a8 100644 --- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -42,6 +42,7 @@ void write_to_aligned_array(int *a, int N) { // CHECK-AMD-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 diff --git a/clang/test/OpenMP/big_jump_loop_codegen.cpp b/clang/test/OpenMP/big_jump_loop_codegen.cpp index a100f796096ddd..9066b5462167ee 100644 --- a/clang/test/OpenMP/big_jump_loop_codegen.cpp +++ b/clang/test/OpenMP/big_jump_loop_codegen.cpp @@ -72,6 +72,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -176,6 +177,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 diff --git a/clang/test/OpenMP/big_jump_loop_split_codegen.cpp b/clang/test/OpenMP/big_jump_loop_split_codegen.cpp index 220ca42ff5c46c..5b18f96e01743b 100644 --- a/clang/test/OpenMP/big_jump_loop_split_codegen.cpp +++ b/clang/test/OpenMP/big_jump_loop_split_codegen.cpp @@ -103,6 +103,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -196,6 +197,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -289,6 +291,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -382,6 +385,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -475,6 +479,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -568,6 +573,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2 diff --git a/clang/test/OpenMP/fast_red_codegen.cpp b/clang/test/OpenMP/fast_red_codegen.cpp index 9287c1e499d13e..93710ba09a57eb 100644 --- a/clang/test/OpenMP/fast_red_codegen.cpp +++ b/clang/test/OpenMP/fast_red_codegen.cpp @@ -134,6 +134,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -235,6 +236,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -339,6 +341,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -463,6 +466,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -584,6 +588,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -605,13 +610,13 @@ int main() // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLB_MAX_ASCAST]], align 4 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMP]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTUPPER_ASCAST]], align 4 // CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4 // CHECK-NEXT: br label [[COND_END:%.*]] @@ -767,6 +772,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -868,6 +874,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -969,6 +976,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM3_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1070,6 +1078,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1179,6 +1188,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1293,6 +1303,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1407,6 +1418,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1522,6 +1534,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1649,6 +1662,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 diff --git a/clang/test/OpenMP/no_loop_codegen.cpp b/clang/test/OpenMP/no_loop_codegen.cpp index 087f836ddb4b65..f3858c6662d9be 100644 --- a/clang/test/OpenMP/no_loop_codegen.cpp +++ b/clang/test/OpenMP/no_loop_codegen.cpp @@ -105,6 +105,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -186,6 +187,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -268,6 +270,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -349,6 +352,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -433,6 +437,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2 @@ -517,6 +522,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -598,6 +604,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -697,6 +704,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 diff --git a/clang/test/OpenMP/no_loop_split_codegen.cpp b/clang/test/OpenMP/no_loop_split_codegen.cpp index 6f3cacdcb8766e..343c7e82cd89a5 100644 --- a/clang/test/OpenMP/no_loop_split_codegen.cpp +++ b/clang/test/OpenMP/no_loop_split_codegen.cpp @@ -125,6 +125,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -207,6 +208,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -289,6 +291,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -371,6 +374,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -453,6 +457,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -1154,6 +1159,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -1236,6 +1242,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -1318,6 +1325,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-2.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-2.cpp index 56efd6f6ac1987..c1de6bcbde0b11 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen-2.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-2.cpp @@ -84,6 +84,7 @@ int main() // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 // CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -142,6 +143,7 @@ int main() // CHECK-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 // CHECK-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -214,6 +216,7 @@ int main() // CHECK-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -287,6 +290,7 @@ int main() // CHECK-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -357,6 +361,7 @@ int main() // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -419,6 +424,7 @@ int main() // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 @@ -495,6 +501,7 @@ int main() // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp index 4c99c1682e0139..3ba0c7bf5ba36b 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp @@ -108,6 +108,7 @@ int main() // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: call void @__kmpc_specialized_kernel_init() // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // IR-GPU-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -205,6 +206,7 @@ int main() // IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: call void @__kmpc_specialized_kernel_init() // IR-GPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // IR-GPU-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 diff --git a/clang/test/OpenMP/target_teams_loop_codegen_as_parallel_for.cpp b/clang/test/OpenMP/target_teams_loop_codegen_as_parallel_for.cpp index b8fb6bb08c0621..d057be08f460f9 100644 --- a/clang/test/OpenMP/target_teams_loop_codegen_as_parallel_for.cpp +++ b/clang/test/OpenMP/target_teams_loop_codegen_as_parallel_for.cpp @@ -60,6 +60,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 @@ -157,6 +158,7 @@ int main() // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 diff --git a/clang/test/OpenMP/xteam_red_callee.cpp b/clang/test/OpenMP/xteam_red_callee.cpp index d5ad7259c4cf3e..dcea9b3c64a4bd 100644 --- a/clang/test/OpenMP/xteam_red_callee.cpp +++ b/clang/test/OpenMP/xteam_red_callee.cpp @@ -727,9 +727,9 @@ int main() // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[SUM]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8 // CHECK-NEXT: ret void // // @@ -751,10 +751,10 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l34_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l34_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -776,9 +776,9 @@ int main() // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[SUM]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8 // CHECK-NEXT: ret void // // @@ -800,10 +800,10 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l34_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l34_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -846,6 +846,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1587,10 +1588,10 @@ int main() // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[SUM]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8 // CHECK-NEXT: ret void // // @@ -1611,11 +1612,11 @@ int main() // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l42_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l42_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -1636,10 +1637,10 @@ int main() // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[SUM]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8 // CHECK-NEXT: ret void // // @@ -1660,11 +1661,11 @@ int main() // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l42_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l42_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -2315,10 +2316,10 @@ int main() // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[SUM]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8 // CHECK-NEXT: ret void // // @@ -2339,11 +2340,11 @@ int main() // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -2364,10 +2365,10 @@ int main() // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[SUM]], align 8 -// CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]] +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8 // CHECK-NEXT: ret void // // @@ -2388,11 +2389,11 @@ int main() // CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[SUM]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l46_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] // CHECK-NEXT: ret void // // @@ -2435,6 +2436,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM5_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 diff --git a/clang/test/OpenMP/xteam_red_codegen.cpp b/clang/test/OpenMP/xteam_red_codegen.cpp index 22e09c5335770e..5231c96ff3553f 100644 --- a/clang/test/OpenMP/xteam_red_codegen.cpp +++ b/clang/test/OpenMP/xteam_red_codegen.cpp @@ -134,6 +134,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -235,6 +236,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -339,6 +341,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -463,6 +466,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -584,6 +588,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -605,13 +610,13 @@ int main() // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4 // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLB_MAX_ASCAST]], align 4 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMP]] to i8 +// CHECK-NEXT: store i8 [[STOREDV]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTUPPER_ASCAST]], align 4 // CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1 -// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK-NEXT: br i1 [[LOADEDV]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4 // CHECK-NEXT: br label [[COND_END:%.*]] @@ -767,6 +772,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -868,6 +874,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -969,6 +976,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM3_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1070,6 +1078,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1179,6 +1188,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1293,6 +1303,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1407,6 +1418,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1522,6 +1534,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1649,6 +1662,7 @@ int main() // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 diff --git a/clang/test/OpenMP/xteam_red_small_precision.c b/clang/test/OpenMP/xteam_red_small_precision.c index 101e273db51fc0..d2c577482b98d6 100644 --- a/clang/test/OpenMP/xteam_red_small_precision.c +++ b/clang/test/OpenMP/xteam_red_small_precision.c @@ -74,6 +74,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca half, align 2, addrspace(5) // CHECK-NEXT: store half 0xH0000, ptr addrspace(5) [[TMP5]], align 2 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -175,6 +176,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca bfloat, align 2, addrspace(5) // CHECK-NEXT: store bfloat 0xR0000, ptr addrspace(5) [[TMP5]], align 2 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -277,6 +279,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM3_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca i16, align 2, addrspace(5) // CHECK-NEXT: store i16 0, ptr addrspace(5) [[TMP5]], align 2 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 diff --git a/clang/test/OpenMP/xteam_red_split_codegen.cpp b/clang/test/OpenMP/xteam_red_split_codegen.cpp index ad090bf7e23c8a..d8c9888c784c56 100644 --- a/clang/test/OpenMP/xteam_red_split_codegen.cpp +++ b/clang/test/OpenMP/xteam_red_split_codegen.cpp @@ -139,6 +139,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -240,6 +241,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -341,6 +343,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -442,6 +445,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -543,6 +547,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -644,6 +649,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4 @@ -745,6 +751,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -846,6 +853,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -947,6 +955,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1048,6 +1057,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1149,6 +1159,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 @@ -1251,6 +1262,7 @@ int main() // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @__kmpc_specialized_kernel_init() // CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5) // CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8 // CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 2c85706798d2ec..65b5f628c96b3f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -481,6 +481,7 @@ __OMP_RTL(omp_get_default_device, false, Int32,) /// OpenMP Device runtime functions __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEnvironmentPtr) __OMP_RTL(__kmpc_target_deinit, false, Void,) +__OMP_RTL(__kmpc_specialized_kernel_init, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -1222,6 +1223,8 @@ __OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt, ParamAttrs(AttributeSet())) __OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_specialized_kernel_init, AttributeSet(), AttributeSet(), + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_parallel_spmd, AlwaysInlineAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h index b3fc24147a8d78..7a66435ab0352f 100644 --- a/offload/DeviceRTL/include/Interface.h +++ b/offload/DeviceRTL/include/Interface.h @@ -227,6 +227,9 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment, void __kmpc_target_deinit(); +// Initializer for a specialized kernel. No finalizer is provided currently. +void __kmpc_specialized_kernel_init(); + ///} /// Reduction diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp index f49884f9c07597..98d861ffdc2fc2 100644 --- a/offload/DeviceRTL/src/Kernel.cpp +++ b/offload/DeviceRTL/src/Kernel.cpp @@ -153,6 +153,8 @@ void __kmpc_target_deinit() { } } +void __kmpc_specialized_kernel_init() { mapping::init(/*IsSPMD=*/true); } + #ifndef FORTRAN_NO_LONGER_NEEDS int32_t __kmpc_target_init_v1(int64_t *, int8_t Mode, int8_t UseGenericStateMachine,