Skip to content

Commit

Permalink
[Clang] [OpenMP] Initialize IsSPMDMode for specialized kernels.
Browse files Browse the repository at this point in the history
Fixes SWDEV-477754: 513.soma fails after driver update. Without this
initialization, the attributor's liveness analysis deduces that parts of
the kernel are unreachable and can be removed.

Change-Id: Ie51084dda5ecd172eeb47b1024da0c920efaff69
  • Loading branch information
dhruvachak authored and ronlieb committed Aug 27, 2024
1 parent 6456a4f commit e548161
Show file tree
Hide file tree
Showing 20 changed files with 169 additions and 60 deletions.
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2596,6 +2596,11 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumBlocks(CodeGenFunction &CGF) {
CGM.getModule(), OMPRTL___kmpc_get_hardware_num_blocks));
}

llvm::Value *CGOpenMPRuntimeGPU::initSpecializedKernel(CodeGenFunction &CGF) {
return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_specialized_kernel_init));
}

std::pair<llvm::Value *, llvm::Value *>
CGOpenMPRuntimeGPU::getXteamRedFunctionPtrs(CodeGenFunction &CGF,
llvm::Type *RedVarType) {
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// Get the number of blocks on the GPU
llvm::Value *getGPUNumBlocks(CodeGenFunction &CGF);

/// Initialization for a specialized kernel.
llvm::Value *initSpecializedKernel(CodeGenFunction &CGF);

std::pair<llvm::Value *, llvm::Value *>
getXteamRedFunctionPtrs(CodeGenFunction &CGF, llvm::Type *RedVarType);

Expand Down
15 changes: 13 additions & 2 deletions clang/lib/CodeGen/CGStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,15 +319,18 @@ void CodeGenFunction::EmitNoLoopCode(const OMPExecutableDirective &D,
const ForStmt *CapturedForStmt,
SourceLocation Loc) {
assert(isa<OMPLoopDirective>(D) && "Unexpected directive");

const OMPLoopDirective &LD = cast<OMPLoopDirective>(D);
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());

// Initialize a specialized kernel.
RT.initSpecializedKernel(*this);

auto IVPair = EmitNoLoopIV(LD);
const VarDecl *IVDecl = IVPair.first;
Address IvAddr = IVPair.second;

// Generate myid = workgroup_id * workgroup_size + workitem_id
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());

// workitem_id
llvm::Value *GpuThreadId = RT.getGPUThreadID(*this);

Expand Down Expand Up @@ -385,6 +388,9 @@ void CodeGenFunction::EmitNoLoopCode(const OMPExecutableDirective &D,
void CodeGenFunction::EmitBigJumpLoopCode(const OMPExecutableDirective &D,
const ForStmt *CapturedForStmt,
SourceLocation Loc) {
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());
// Initialize a specialized kernel.
RT.initSpecializedKernel(*this);
EmitStmt(CapturedForStmt);
}

Expand All @@ -396,6 +402,11 @@ void CodeGenFunction::EmitXteamRedCode(const OMPExecutableDirective &D,
// generated
CGM.setCurrentXteamRedStmt(CapturedForStmt);

auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());

// Initialize a specialized kernel.
RT.initSpecializedKernel(*this);

EmitXteamLocalAggregator(CapturedForStmt);

// Now emit the modified loop. If there is a statement in the loop with a
Expand Down
1 change: 1 addition & 0 deletions clang/test/OpenMP/amdgcn_target_device_vla.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ int main() {
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down
1 change: 1 addition & 0 deletions clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void write_to_aligned_array(int *a, int N) {
// CHECK-AMD-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down
2 changes: 2 additions & 0 deletions clang/test/OpenMP/big_jump_loop_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -176,6 +177,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down
6 changes: 6 additions & 0 deletions clang/test/OpenMP/big_jump_loop_split_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -196,6 +197,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -289,6 +291,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -382,6 +385,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -475,6 +479,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
Expand Down Expand Up @@ -568,6 +573,7 @@ int main()
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: store i32 0, ptr [[K_ASCAST]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 2
Expand Down
22 changes: 18 additions & 4 deletions clang/test/OpenMP/fast_red_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -235,6 +236,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -339,6 +341,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -463,6 +466,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -584,6 +588,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand All @@ -605,13 +610,13 @@ int main()
// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLB_MAX_ASCAST]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]]
// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8
// CHECK-NEXT: store i8 [[FROMBOOL]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[CMP]] to i8
// CHECK-NEXT: store i8 [[STOREDV]], ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1
// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTUPPER_ASCAST]], align 4
// CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTMIN_LESS_MAX_ASCAST]], align 1
// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1
// CHECK-NEXT: br i1 [[LOADEDV]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTLB_MIN_ASCAST]], align 4
// CHECK-NEXT: br label [[COND_END:%.*]]
Expand Down Expand Up @@ -767,6 +772,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -868,6 +874,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM1_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -969,6 +976,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM3_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1070,6 +1078,7 @@ int main()
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SUM2_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP5:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP5]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1179,6 +1188,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1293,6 +1303,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP7]], align 4
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1407,6 +1418,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1522,6 +1534,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[BINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CINT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca i64, align 8, addrspace(5)
// CHECK-NEXT: store i64 0, ptr addrspace(5) [[TMP7]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down Expand Up @@ -1649,6 +1662,7 @@ int main()
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @__kmpc_specialized_kernel_init()
// CHECK-NEXT: [[TMP7:%.*]] = alloca double, align 8, addrspace(5)
// CHECK-NEXT: store double 0.000000e+00, ptr addrspace(5) [[TMP7]], align 8
// CHECK-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
Expand Down
Loading

0 comments on commit e548161

Please sign in to comment.