Skip to content

Commit

Permalink
[MLIR][OpenMP] Lower only target related pragmas for GPU MLIR
Browse files Browse the repository at this point in the history
Flang generates two MLIR files for target-related pragmas:
the host MLIR and the GPU MLIR. If a given source function
contains both host and target-related pragmas, we must ensure
that we do not lower the host-related pragmas for the GPU MLIR.
  • Loading branch information
DominikAdamski committed Feb 22, 2024
1 parent db9e8eb commit 73466c5
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,46 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
&funcEntryBlock, funcEntryBlock.getFirstInsertionPt());
}

static bool isOpAllowedToBeLowered(Operation *opInst,
llvm::OpenMPIRBuilder *ompBuilder) {
if (!opInst)
return false;
// omp.target operation can be lowered for host and device MLIR
if (isa<omp::TargetOp>(opInst))
return true;

// OpenMP operations inside omp.target can be lowered for host and device MLIR
if (opInst->getParentOfType<omp::TargetOp>())
return true;

// TODO: Add support for test case:
// omp.parallel { //host pragma
// omp.target { }
// }
bool hasTargetRegion =
opInst->walk([](omp::TargetOp) { return WalkResult::interrupt(); })
.wasInterrupted();
if (hasTargetRegion)
opInst->emitError("Target region inside other pragma is not yet supported");

// Check if given OpenMP operation belongs to function labelled with
// omp declare target pragma
LLVM::LLVMFuncOp funcOp = opInst->getParentOfType<LLVM::LLVMFuncOp>();
omp::DeclareTargetDeviceType declareType = omp::DeclareTargetDeviceType::host;

if (!funcOp)
return false;
auto declareTargetOp =
dyn_cast<omp::DeclareTargetInterface>(funcOp.getOperation());
if (declareTargetOp && declareTargetOp.isDeclareTarget())
declareType = declareTargetOp.getDeclareTargetDeviceType();
if ((declareType == omp::DeclareTargetDeviceType::host) &&
ompBuilder->Config.isGPU()) {
return false;
}
return true;
}

/// Converts the given region that appears within an OpenMP dialect operation to
/// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the
/// region, and a branch from any block with an successor-less OpenMP terminator
Expand Down Expand Up @@ -3182,6 +3222,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(

llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();

// Skip lowering of an OpenMP operation if it's context is not appropriate
if (!isOpAllowedToBeLowered(op, ompBuilder))
return success();
return llvm::TypeSwitch<Operation *, LogicalResult>(op)
.Case([&](omp::BarrierOp) {
ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
Expand Down
3 changes: 2 additions & 1 deletion mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {
target_cpu = "gfx90a",
target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>
target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>,
omp.declare_target = #omp.declaretarget<device_type = (any)>
} {
omp.parallel {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// for nested omp do loop with collapse clause inside omp target region

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) {
llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (any)> } {
%loop_ub = llvm.mlir.constant(99 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : index) : i32
Expand Down
6 changes: 4 additions & 2 deletions mlir/test/Target/LLVMIR/omptarget-wsloop.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
// for nested omp do loop inside omp target region

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
llvm.func @target_wsloop(%arg0: !llvm.ptr ){
llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {
omp.declare_target = #omp.declaretarget<device_type = (any)> } {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
Expand All @@ -16,7 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
llvm.return
}

llvm.func @target_empty_wsloop(){
llvm.func @target_empty_wsloop() attributes {
omp.declare_target = #omp.declaretarget<device_type = (any)> } {
%loop_ub = llvm.mlir.constant(9 : i32) : i32
%loop_lb = llvm.mlir.constant(0 : i32) : i32
%loop_step = llvm.mlir.constant(1 : i32) : i32
Expand Down

0 comments on commit 73466c5

Please sign in to comment.