From 73466c5de83beac4109c65c99b25654db8bb67fe Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 22 Feb 2024 03:53:40 -0600 Subject: [PATCH] [MLIR][OpenMP] Lower only target related pragmas for GPU MLIR Flang generates two MLIR files for target-related pragmas: the host MLIR and the GPU MLIR. If a given source function contains both host and target-related pragmas, we must ensure that we do not lower the host-related pragmas for the GPU MLIR. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 43 +++++++++++++++++++ .../LLVMIR/omptarget-parallel-wsloop.mlir | 3 +- .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/omptarget-wsloop.mlir | 6 ++- 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3500ddc93a1753..d4ec80d700c071 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -127,6 +127,46 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); } +static bool isOpAllowedToBeLowered(Operation *opInst, + llvm::OpenMPIRBuilder *ompBuilder) { + if (!opInst) + return false; + // omp.target operation can be lowered for host and device MLIR + if (isa(opInst)) + return true; + + // OpenMP operations inside omp.target can be lowered for host and device MLIR + if (opInst->getParentOfType()) + return true; + + // TODO: Add support for test case: + // omp.parallel { //host pragma + // omp.target { } + // } + bool hasTargetRegion = + opInst->walk([](omp::TargetOp) { return WalkResult::interrupt(); }) + .wasInterrupted(); + if (hasTargetRegion) + opInst->emitError("Target region inside other pragma is not yet supported"); + + // Check if given OpenMP operation belongs to function labelled with + // omp declare target pragma + LLVM::LLVMFuncOp funcOp = opInst->getParentOfType(); + omp::DeclareTargetDeviceType declareType = omp::DeclareTargetDeviceType::host; + + if (!funcOp) + return false; + auto declareTargetOp = + dyn_cast(funcOp.getOperation()); + if (declareTargetOp && declareTargetOp.isDeclareTarget()) + declareType = declareTargetOp.getDeclareTargetDeviceType(); + if ((declareType == omp::DeclareTargetDeviceType::host) && + ompBuilder->Config.isGPU()) { + return false; + } + return true; +} + /// Converts the given region that appears within an OpenMP dialect operation to /// LLVM IR, creating a branch from the `sourceBlock` to the entry block of the /// region, and a branch from any block with an successor-less OpenMP terminator @@ -3182,6 +3222,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + // Skip lowering of an OpenMP operation if it's context is not appropriate + if (!isOpAllowedToBeLowered(op, ompBuilder)) + return success(); return llvm::TypeSwitch(op) .Case([&](omp::BarrierOp) { ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index afbf5f22246309..1561b535250142 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -6,7 +6,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes { target_cpu = "gfx90a", - target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]> + target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>, + omp.declare_target = #omp.declaretarget } { omp.parallel { %loop_ub = llvm.mlir.constant(9 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index 435aca32450c2f..4690cf43e84df2 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -4,7 +4,7 @@ // for nested omp do loop with collapse clause inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) { + llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget } { %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index 4cfb7d4f695143..a7b3f3542a4acd 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -4,7 +4,8 @@ // for nested omp do loop inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_wsloop(%arg0: !llvm.ptr ){ + llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes { + omp.declare_target = #omp.declaretarget } { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 @@ -16,7 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.return } - llvm.func @target_empty_wsloop(){ + llvm.func @target_empty_wsloop() attributes { + omp.declare_target = #omp.declaretarget } { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32