From 3abba41a493cc1e6778b69f1cb25e1a71407d34d Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 6 Aug 2024 12:45:27 +0100 Subject: [PATCH 1/3] [MLIR][OpenMP] Remove omp.parallel from loop wrapper operations This patch removes the `LoopWrapperInterface` from `omp.parallel` and updates the semantics of the interface to make loop wrapper restrictions mandatory to operations that have it, rather than a role they might optionally take. MLIR operation verifiers are updated to expect the "hoisted omp.parallel" representation for `distribute parallel do`, to be later implemented in place of a loop wrapper `omp.parallel`. --- .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 4 +- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 1 - .../Dialect/OpenMP/OpenMPOpsInterfaces.td | 26 +++--- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 56 +++++-------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 +- mlir/test/Dialect/OpenMP/invalid.mlir | 79 +++++-------------- mlir/test/Dialect/OpenMP/ops.mlir | 9 ++- 7 files changed, 62 insertions(+), 116 deletions(-) diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index ddb5801f3cbc0e..0ae78dc5da07af 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -235,9 +235,7 @@ void DataSharingProcessor::insertBarrier() { void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { mlir::omp::LoopNestOp loopOp; if (auto wrapper = mlir::dyn_cast(op)) - loopOp = wrapper.isWrapper() - ? mlir::cast(wrapper.getWrappedLoop()) - : nullptr; + loopOp = mlir::cast(wrapper.getWrappedLoop()); bool cmpCreated = false; mlir::OpBuilder::InsertionGuard guard(firOpBuilder); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index d05543f8fbd929..9fbb122170e798 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -128,7 +128,6 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]> def ParallelOp : OpenMP_Op<"parallel", traits = [ AttrSizedOperandSegments, AutomaticAllocationScope, - DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, RecursiveMemoryEffects ], clauses = [ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index 2d1de37239c82a..ace671e04d7876 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -71,10 +71,10 @@ def ReductionClauseInterface : OpInterface<"ReductionClauseInterface"> { def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { let description = [{ - OpenMP operations that can wrap a single loop nest. When taking a wrapper - role, these operations must only contain a single region with a single block - in which there's a single operation and a terminator. That nested operation - must be another loop wrapper or an `omp.loop_nest`. + OpenMP operations that wrap a single loop nest. They must only contain a + single region with a single block in which there's a single operation and a + terminator. That nested operation must be another loop wrapper or an + `omp.loop_nest`. }]; let cppNamespace = "::mlir::omp"; @@ -82,13 +82,13 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { let methods = [ InterfaceMethod< /*description=*/[{ - Tell whether the operation could be taking the role of a loop wrapper. - That is, it has a single region with a single block in which there are - two operations: another wrapper (also taking a loop wrapper role) or + Check whether the operation is a valid loop wrapper. That is, it has a + single region with a single block in which there are two operations: + another loop wrapper (also taking a loop wrapper role) or `omp.loop_nest` operation and a terminator. }], /*retTy=*/"bool", - /*methodName=*/"isWrapper", + /*methodName=*/"isValidWrapper", (ins ), [{}], [{ if ($_op->getNumRegions() != 1) return false; @@ -107,7 +107,7 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { return false; if (auto wrapper = ::llvm::dyn_cast(firstOp)) - return wrapper.isWrapper(); + return wrapper.isValidWrapper(); return ::llvm::isa(firstOp); }] @@ -115,12 +115,12 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { InterfaceMethod< /*description=*/[{ If there is another loop wrapper immediately nested inside, return that - operation. Assumes this operation is taking a loop wrapper role. + operation. Assumes this operation is a valid loop wrapper. }], /*retTy=*/"::mlir::omp::LoopWrapperInterface", /*methodName=*/"getNestedWrapper", (ins), [{}], [{ - assert($_op.isWrapper() && "Unexpected non-wrapper op"); + assert($_op.isValidWrapper() && "Unexpected non-wrapper op"); Operation *nested = &*$_op->getRegion(0).op_begin(); return ::llvm::dyn_cast(nested); }] @@ -128,12 +128,12 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { InterfaceMethod< /*description=*/[{ Return the loop nest nested directly or indirectly inside of this loop - wrapper. Assumes this operation is taking a loop wrapper role. + wrapper. Assumes this operation is a valid loop wrapper. }], /*retTy=*/"::mlir::Operation *", /*methodName=*/"getWrappedLoop", (ins), [{}], [{ - assert($_op.isWrapper() && "Unexpected non-wrapper op"); + assert($_op.isValidWrapper() && "Unexpected non-wrapper op"); if (LoopWrapperInterface nested = $_op.getNestedWrapper()) return nested.getWrappedLoop(); return &*$_op->getRegion(0).op_begin(); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 5f6007561967e4..82efe62525ce12 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1604,15 +1604,15 @@ bool TargetOp::isTargetSPMDLoop() { if (!isa_and_present(workshareOp)) return false; - Operation *parallelOp = workshareOp->getParentOp(); - if (!isa_and_present(parallelOp)) + Operation *distributeOp = workshareOp->getParentOp(); + if (!isa_and_present(distributeOp)) return false; - Operation *distributeOp = parallelOp->getParentOp(); - if (!isa_and_present(distributeOp)) + Operation *parallelOp = distributeOp->getParentOp(); + if (!isa_and_present(parallelOp)) return false; - Operation *teamsOp = distributeOp->getParentOp(); + Operation *teamsOp = parallelOp->getParentOp(); if (!isa_and_present(teamsOp)) return false; @@ -1690,22 +1690,6 @@ static LogicalResult verifyPrivateVarList(OpType &op) { } LogicalResult ParallelOp::verify() { - // Check that it is a valid loop wrapper if it's taking that role. - if (isa((*this)->getParentOp())) { - if (!isWrapper()) - return emitOpError() << "must take a loop wrapper role if nested inside " - "of 'omp.distribute'"; - - if (LoopWrapperInterface nested = getNestedWrapper()) { - // Check for the allowed leaf constructs that may appear in a composite - // construct directly after PARALLEL. - if (!isa(nested)) - return emitError() << "only supported nested wrapper is 'omp.wsloop'"; - } else { - return emitOpError() << "must not wrap an 'omp.loop_nest' directly"; - } - } - if (getAllocateVars().size() != getAllocatorVars().size()) return emitError( "expected equal sizes for allocate and allocator variables"); @@ -1894,8 +1878,8 @@ void WsloopOp::build(OpBuilder &builder, OperationState &state, } LogicalResult WsloopOp::verify() { - if (!isWrapper()) - return emitOpError() << "must be a loop wrapper"; + if (!isValidWrapper()) + return emitOpError() << "must be a valid loop wrapper"; if (LoopWrapperInterface nested = getNestedWrapper()) { // Check for the allowed leaf constructs that may appear in a composite @@ -1939,8 +1923,8 @@ LogicalResult SimdOp::verify() { if (verifyNontemporalClause(*this, getNontemporalVars()).failed()) return failure(); - if (!isWrapper()) - return emitOpError() << "must be a loop wrapper"; + if (!isValidWrapper()) + return emitOpError() << "must be a valid loop wrapper"; if (getNestedWrapper()) return emitOpError() << "must wrap an 'omp.loop_nest' directly"; @@ -1970,15 +1954,19 @@ LogicalResult DistributeOp::verify() { return emitError( "expected equal sizes for allocate and allocator variables"); - if (!isWrapper()) - return emitOpError() << "must be a loop wrapper"; + if (!isValidWrapper()) + return emitOpError() << "must be a valid loop wrapper"; if (LoopWrapperInterface nested = getNestedWrapper()) { // Check for the allowed leaf constructs that may appear in a composite // construct directly after DISTRIBUTE. - if (!isa(nested)) - return emitError() << "only supported nested wrappers are 'omp.parallel' " - "and 'omp.simd'"; + if (isa(nested)) { + if (!llvm::dyn_cast_if_present((*this)->getParentOp())) + return emitError() << "an 'omp.wsloop' nested wrapper is only allowed " + "when 'omp.parallel' is the direct parent"; + } else if (!isa(nested)) + return emitError() << "only supported nested wrappers are 'omp.simd' and " + "'omp.wsloop'"; } return success(); @@ -2176,8 +2164,8 @@ LogicalResult TaskloopOp::verify() { "may not appear on the same taskloop directive"); } - if (!isWrapper()) - return emitOpError() << "must be a loop wrapper"; + if (!isValidWrapper()) + return emitOpError() << "must be a valid loop wrapper"; if (LoopWrapperInterface nested = getNestedWrapper()) { // Check for the allowed leaf constructs that may appear in a composite @@ -2269,7 +2257,7 @@ LogicalResult LoopNestOp::verify() { auto wrapper = llvm::dyn_cast_if_present((*this)->getParentOp()); - if (!wrapper || !wrapper.isWrapper()) + if (!wrapper || !wrapper.isValidWrapper()) return emitOpError() << "expects parent op to be a valid loop wrapper"; return success(); @@ -2280,8 +2268,6 @@ void LoopNestOp::gatherWrappers( Operation *parent = (*this)->getParentOp(); while (auto wrapper = llvm::dyn_cast_if_present(parent)) { - if (!wrapper.isWrapper()) - break; wrappers.push_back(wrapper); parent = parent->getParentOp(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 8c72411b7b2e17..a5b289bee84b19 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3190,8 +3190,7 @@ static LogicalResult convertOmpDistribute( llvm::OpenMPIRBuilder::InsertPointTy *redAllocaIP, SmallVector &reductionInfos) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - // FIXME: This ignores any other nested wrappers (e.g. omp.parallel + - // omp.wsloop, omp.simd). + // FIXME: This ignores any other nested wrappers (e.g. omp.wsloop, omp.simd). auto distributeOp = cast(opInst); auto loopOp = cast(distributeOp.getWrappedLoop()); diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 3bed809bb6dc0d..856415655490f8 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -10,58 +10,6 @@ func.func @unknown_clause() { // ----- -func.func @not_wrapper() { - // expected-error@+1 {{op must be a loop wrapper}} - omp.distribute { - omp.parallel { - %0 = arith.constant 0 : i32 - omp.terminator - } - omp.terminator - } - - return -} - -// ----- - -func.func @invalid_nested_wrapper(%lb : index, %ub : index, %step : index) { - omp.distribute { - // expected-error@+1 {{only supported nested wrapper is 'omp.wsloop'}} - omp.parallel { - omp.simd { - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { - omp.yield - } - omp.terminator - } - omp.terminator - } - omp.terminator - } - - return -} - -// ----- - -func.func @no_nested_wrapper(%lb : index, %ub : index, %step : index) { - omp.distribute { - // expected-error@+1 {{op must not wrap an 'omp.loop_nest' directly}} - omp.parallel { - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { - omp.yield - } - omp.terminator - } - omp.terminator - } - - return -} - -// ----- - func.func @if_once(%n : i1) { // expected-error@+1 {{`if` clause can appear at most once in the expansion of the oilist directive}} omp.parallel if(%n) if(%n) { @@ -188,7 +136,7 @@ func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) { // ----- func.func @no_wrapper(%lb : index, %ub : index, %step : index) { - // expected-error @below {{op must be a loop wrapper}} + // expected-error @below {{op must be a valid loop wrapper}} omp.wsloop { %0 = arith.constant 0 : i32 omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { @@ -374,7 +322,7 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step : // ----- func.func @omp_simd() -> () { - // expected-error @below {{op must be a loop wrapper}} + // expected-error @below {{op must be a valid loop wrapper}} omp.simd { omp.terminator } @@ -1939,7 +1887,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { // ----- func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { - // expected-error @below {{op must be a loop wrapper}} + // expected-error @below {{op must be a valid loop wrapper}} omp.taskloop { %0 = arith.constant 0 : i32 omp.terminator @@ -2148,7 +2096,7 @@ func.func @omp_distribute_allocate(%data_var : memref) -> () { // ----- func.func @omp_distribute_wrapper() -> () { - // expected-error @below {{op must be a loop wrapper}} + // expected-error @below {{op must be a valid loop wrapper}} omp.distribute { %0 = arith.constant 0 : i32 "omp.terminator"() : () -> () @@ -2157,8 +2105,8 @@ func.func @omp_distribute_wrapper() -> () { // ----- -func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -> () { - // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simd'}} +func.func @omp_distribute_nested_wrapper1(%lb: index, %ub: index, %step: index) -> () { + // expected-error @below {{an 'omp.wsloop' nested wrapper is only allowed when 'omp.parallel' is the direct parent}} omp.distribute { "omp.wsloop"() ({ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { @@ -2172,6 +2120,21 @@ func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) - // ----- +func.func @omp_distribute_nested_wrapper2(%lb: index, %ub: index, %step: index) -> () { + // expected-error @below {{only supported nested wrappers are 'omp.simd' and 'omp.wsloop'}} + omp.distribute { + "omp.taskloop"() ({ + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { + "omp.yield"() : () -> () + } + "omp.terminator"() : () -> () + }) : () -> () + "omp.terminator"() : () -> () + } +} + +// ----- + func.func @omp_distribute_order() -> () { // expected-error @below {{invalid clause value: 'default'}} omp.distribute order(default) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index e49731c0d68301..92e42d31e90333 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -99,10 +99,11 @@ func.func @omp_parallel(%data_var : memref, %if_cond : i1, %num_threads : i omp.terminator }) {operandSegmentSizes = array} : (memref, memref) -> () - // CHECK: omp.distribute - omp.distribute { - // CHECK-NEXT: omp.parallel - omp.parallel { + // CHECK: omp.parallel + omp.parallel { + // CHECK-NOT: omp.terminator + // CHECK: omp.distribute + omp.distribute { // CHECK-NEXT: omp.wsloop omp.wsloop { // CHECK-NEXT: omp.loop_nest From 01c368d9189d9e3c49ae9ad7c19b7c91289e4a43 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 6 Aug 2024 16:15:31 +0100 Subject: [PATCH 2/3] [Flang][OpenMP] Re-implement lowering of 'DISTRIBUTE PARALLEL DO' This patch updates the Flang lowering process for `distribute parallel do` to follow the "hoisted `omp.parallel`" representation. Now temporary allocations produced while lowering the loop body reside inside of that operation instead of the loop wrappers' parent region. Special handling of `omp.parallel` with regards to alloca creation is removed, as it's no longer necessary to make this distinction. Impacted Lit tests are updated according to the new representation. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 135 +++++++++------ flang/lib/Optimizer/Builder/FIRBuilder.cpp | 24 +-- flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 13 +- .../lib/Optimizer/Transforms/StackArrays.cpp | 30 +--- .../test/Lower/OpenMP/eval-outside-target.f90 | 44 ++--- .../Lower/OpenMP/hlfir-to-fir-conv-omp.mlir | 11 +- flang/test/Lower/OpenMP/if-clause.f90 | 160 +++++++++--------- flang/test/Lower/OpenMP/loop-compound.f90 | 12 +- flang/test/Lower/OpenMP/target_private.f90 | 10 +- 9 files changed, 211 insertions(+), 228 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 662d0b56665f32..245a5e63ea1b7b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1905,18 +1905,23 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return parallelOp; } -// TODO: Replace with genWrapperOp calls. -static mlir::omp::ParallelOp genParallelWrapperOp( +static mlir::omp::ParallelOp genParallelCompositeOp( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const mlir::omp::ParallelOperands &clauseOps, + const List &clauses, lower::pft::Evaluation &eval, + mlir::Location loc, mlir::omp::ParallelOperands &clauseOps, mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, llvm::ArrayRef reductionSyms, llvm::ArrayRef reductionTypes, mlir::omp::TargetOp parentTarget, DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // Create omp.parallel wrapper. + if (enableDelayedPrivatization) { + const auto &privateClauseOps = dsp.getPrivateClauseOps(); + clauseOps.privateVars = privateClauseOps.privateVars; + clauseOps.privateSyms = privateClauseOps.privateSyms; + } + + // Create omp.parallel operation. auto parallelOp = firOpBuilder.create(loc, clauseOps); if (numThreadsClauseOps.numThreads) { @@ -1928,22 +1933,60 @@ static mlir::omp::ParallelOp genParallelWrapperOp( } // Populate entry block arguments with reduction and private variables. - mlir::OperandRange privateVars = parallelOp.getPrivateVars(); - llvm::SmallVector blockArgTypes(reductionTypes.begin(), reductionTypes.end()); - blockArgTypes.reserve(blockArgTypes.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(blockArgTypes), - [](mlir::Value v) { return v.getType(); }); - llvm::SmallVector blockArgLocs(reductionTypes.size(), loc); - blockArgLocs.reserve(blockArgLocs.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(blockArgLocs), - [](mlir::Value v) { return v.getLoc(); }); + llvm::SmallVector blockSyms(reductionSyms); + + if (enableDelayedPrivatization) { + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + + blockArgTypes.reserve(blockArgTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(blockArgTypes), + [](mlir::Value v) { return v.getType(); }); - firOpBuilder.createBlock(¶llelOp.getRegion(), {}, blockArgTypes, + blockArgLocs.reserve(blockArgLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(blockArgLocs), + [](mlir::Value v) { return v.getLoc(); }); + + llvm::append_range(blockSyms, dsp.getDelayedPrivSyms()); + } + + mlir::Region ®ion = parallelOp.getRegion(); + firOpBuilder.createBlock(®ion, /*insertPt=*/{}, blockArgTypes, blockArgLocs); + // Bind syms to block args. + unsigned argIdx = 0; + for (const semantics::Symbol *arg : blockSyms) { + auto bind = [&](const semantics::Symbol *sym) { + mlir::BlockArgument blockArg = region.getArgument(argIdx++); + converter.bindSymbol(*sym, hlfir::translateToExtendedValue( + loc, firOpBuilder, hlfir::Entity{blockArg}, + /*contiguousHint=*/ + evaluate::IsSimplyContiguous( + *sym, converter.getFoldingContext())) + .first); + }; + + if (const auto *commonDet = + arg->detailsIf()) { + for (const auto &mem : commonDet->objects()) + bind(&*mem); + } else + bind(arg); + } + + // Handle threadprivate and copyin, which would normally be done as part of + // `createBodyOfOp()`. However, when generating `omp.parallel` as part of a + // composite construct, we can't recursively lower its contents. This prevents + // us from being able to rely on the existing `genOpWithBody()` flow. + { + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + threadPrivatizeVars(converter, eval); + } + ClauseProcessor(converter, semaCtx, clauses).processCopyin(); + firOpBuilder.setInsertionPoint( lower::genOpenMPTerminator(firOpBuilder, parallelOp, loc)); @@ -2505,11 +2548,7 @@ static void genCompositeDistributeParallelDo( findParentTargetOp(converter.getFirOpBuilder()); bool evalOutsideTarget = mustEvalTeamsThreadsOutsideTarget(eval, targetOp); - // Clause processing. - mlir::omp::DistributeOperands distributeClauseOps; - genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - distributeClauseOps); - + // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; mlir::omp::NumThreadsClauseOps numThreadsClauseOps; llvm::SmallVector parallelReductionSyms; @@ -2518,9 +2557,15 @@ static void genCompositeDistributeParallelDo( evalOutsideTarget, parallelClauseOps, numThreadsClauseOps, parallelReductionTypes, parallelReductionSyms); - const auto &privateClauseOps = dsp.getPrivateClauseOps(); - parallelClauseOps.privateVars = privateClauseOps.privateVars; - parallelClauseOps.privateSyms = privateClauseOps.privateSyms; + genParallelCompositeOp(converter, semaCtx, item->clauses, eval, loc, + parallelClauseOps, numThreadsClauseOps, + parallelReductionSyms, parallelReductionTypes, + evalOutsideTarget ? targetOp : nullptr, dsp); + + // Clause processing. + mlir::omp::DistributeOperands distributeClauseOps; + genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, + distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; @@ -2538,11 +2583,6 @@ static void genCompositeDistributeParallelDo( auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); - auto parallelOp = genParallelWrapperOp( - converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, - parallelReductionSyms, parallelReductionTypes, - evalOutsideTarget ? targetOp : nullptr, dsp); - // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopReductionTypes); @@ -2550,14 +2590,10 @@ static void genCompositeDistributeParallelDo( // Construct wrapper entry block list and associated symbols. It is important // that the symbol order and the block argument order match, so that the // symbol-value bindings created are correct. - auto wrapperSyms = - llvm::to_vector(llvm::concat( - parallelReductionSyms, dsp.getDelayedPrivSyms(), - wsloopReductionSyms)); + auto &wrapperSyms = wsloopReductionSyms; auto wrapperArgs = llvm::to_vector( llvm::concat(distributeOp.getRegion().getArguments(), - parallelOp.getRegion().getArguments(), wsloopOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, @@ -2576,11 +2612,7 @@ static void genCompositeDistributeParallelDoSimd( findParentTargetOp(converter.getFirOpBuilder()); bool evalOutsideTarget = mustEvalTeamsThreadsOutsideTarget(eval, targetOp); - // Clause processing. - mlir::omp::DistributeOperands distributeClauseOps; - genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - distributeClauseOps); - + // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; mlir::omp::NumThreadsClauseOps numThreadsClauseOps; llvm::SmallVector parallelReductionSyms; @@ -2589,9 +2621,15 @@ static void genCompositeDistributeParallelDoSimd( evalOutsideTarget, parallelClauseOps, numThreadsClauseOps, parallelReductionTypes, parallelReductionSyms); - const auto &privateClauseOps = dsp.getPrivateClauseOps(); - parallelClauseOps.privateVars = privateClauseOps.privateVars; - parallelClauseOps.privateSyms = privateClauseOps.privateSyms; + genParallelCompositeOp(converter, semaCtx, item->clauses, eval, loc, + parallelClauseOps, numThreadsClauseOps, + parallelReductionSyms, parallelReductionTypes, + evalOutsideTarget ? targetOp : nullptr, dsp); + + // Clause processing. + mlir::omp::DistributeOperands distributeClauseOps; + genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, + distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; @@ -2612,11 +2650,6 @@ static void genCompositeDistributeParallelDoSimd( auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); - auto parallelOp = genParallelWrapperOp( - converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, - parallelReductionSyms, parallelReductionTypes, - evalOutsideTarget ? targetOp : nullptr, dsp); - // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopReductionTypes); @@ -2628,14 +2661,10 @@ static void genCompositeDistributeParallelDoSimd( // Construct wrapper entry block list and associated symbols. It is important // that the symbol order and the block argument order match, so that the // symbol-value bindings created are correct. - auto wrapperSyms = - llvm::to_vector(llvm::concat( - parallelReductionSyms, dsp.getDelayedPrivSyms(), - wsloopReductionSyms)); + auto &wrapperSyms = wsloopReductionSyms; auto wrapperArgs = llvm::to_vector(llvm::concat( distributeOp.getRegion().getArguments(), - parallelOp.getRegion().getArguments(), wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, @@ -2756,10 +2785,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { + // Used delayed privatization for 'distribute parallel do [simd]'. + bool useDelayedPrivatization = llvm::omp::allParallelSet.test(item->id); symTable.pushScope(); loopDsp.emplace(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); + useDelayedPrivatization, &symTable); loopDsp->processStep1(); loopDsp->processStep2(); } diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index c45d3b9d7a2f3d..d54715d3fa3f56 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -256,19 +256,7 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto ompOutlineableIface = getRegion() .getParentOfType()) { - // omp.parallel can work as a block construct but it can also be a loop - // wrapper when part of a composite construct. Make sure it's only treated - // as a block if it's not a wrapper. - auto parallelOp = - llvm::dyn_cast(*ompOutlineableIface); - if (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())) - return ompOutlineableIface.getAllocaBlock(); - - if (auto parentOutlineable = - parallelOp - ->getParentOfType()) - return parentOutlineable.getAllocaBlock(); + return ompOutlineableIface.getAllocaBlock(); } if (auto recipeIface = @@ -285,15 +273,9 @@ mlir::Value fir::FirOpBuilder::createTemporaryAlloc( llvm::ArrayRef attrs) { assert(!mlir::isa(type) && "cannot be a reference"); // If the alloca is inside an OpenMP Op which will be outlined then pin - // the alloca here. Make sure that an omp.parallel operation that is taking - // a loop wrapper role is not detected as outlineable here. - auto iface = - getRegion().getParentOfType(); - auto parallelOp = - iface ? llvm::dyn_cast(*iface) : nullptr; + // the alloca here. const bool pinned = - iface && (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())); + getRegion().getParentOfType(); mlir::Value temp = create(loc, type, /*unique_name=*/llvm::StringRef{}, name, pinned, lenParams, shape, attrs); diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 22026f1d258716..0e114f069be66d 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -285,16 +285,9 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize( // 4. The first ancestor that is one of the above. mlir::Block *ConvertFIRToLLVMPattern::getBlockForAllocaInsert( mlir::Operation *op, mlir::Region *parentRegion) const { - if (auto iface = - mlir::dyn_cast(op)) { - // omp.parallel can work as a block construct but it can also be a loop - // wrapper when it's part of a composite construct. Make sure it's only - // treated as a block if it's not a wrapper. - auto parallelOp = llvm::dyn_cast(*iface); - if (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())) - return iface.getAllocaBlock(); - } + if (auto outlineableIface = + mlir::dyn_cast(op)) + return outlineableIface.getAllocaBlock(); if (auto recipeIface = mlir::dyn_cast(op)) return recipeIface.getAllocaBlock(*parentRegion); if (auto llvmFuncOp = mlir::dyn_cast(op)) diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index d6eae5ec5fdde0..bdc2d9cd9c6c43 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -589,31 +589,8 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { return {point}; }; - // Find the first OpenMP outlineable parent region while taking into account - // the possibility of finding an omp.parallel region that is taking a loop - // wrapper role. These operations must be skipped, as they cannot hold - // allocations. - const auto findOmpRegion = [](mlir::Operation *op) { - auto findOmpRegionImpl = - [](mlir::Operation *op, - auto &findOmpRegion) -> mlir::omp::OutlineableOpenMPOpInterface { - auto ompRegion = - op->getParentOfType(); - if (!ompRegion) - return nullptr; - - if (auto parallelOp = - mlir::dyn_cast_if_present(*ompRegion)) { - mlir::Operation *parentOp = parallelOp->getParentOp(); - if (mlir::isa_and_present(parentOp)) - return findOmpRegion(parentOp, findOmpRegion); - } - return ompRegion; - }; - return findOmpRegionImpl(op, findOmpRegionImpl); - }; - - auto oldOmpRegion = findOmpRegion(oldAlloc); + auto oldOmpRegion = + oldAlloc->getParentOfType(); // Find when the last operand value becomes available mlir::Block *operandsBlock = nullptr; @@ -641,7 +618,8 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { LLVM_DEBUG(llvm::dbgs() << "--Placing after last operand: " << *lastOperand << "\n"); // check we aren't moving out of an omp region - auto lastOpOmpRegion = findOmpRegion(lastOperand); + auto lastOpOmpRegion = + lastOperand->getParentOfType(); if (lastOpOmpRegion == oldOmpRegion) return checkReturn(lastOperand); // Presumably this happened because the operands became ready before the diff --git a/flang/test/Lower/OpenMP/eval-outside-target.f90 b/flang/test/Lower/OpenMP/eval-outside-target.f90 index 5d4a8a104c8952..ef578610e8e908 100644 --- a/flang/test/Lower/OpenMP/eval-outside-target.f90 +++ b/flang/test/Lower/OpenMP/eval-outside-target.f90 @@ -2,12 +2,11 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE -! CHECK-LABEL: func.func @_QPteams +! BOTH-LABEL: func.func @_QPteams subroutine teams() ! BOTH: omp.target - ! HOST-SAME: num_teams({{.*}}) - ! HOST-SAME: teams_thread_limit({{.*}}) + ! HOST-SAME: num_teams({{.*}}) teams_thread_limit({{.*}}) ! DEVICE-NOT: num_teams({{.*}}) ! DEVICE-NOT: teams_thread_limit({{.*}}) @@ -20,8 +19,7 @@ subroutine teams() ! HOST-NOT: thread_limit({{.*}}) ! HOST-SAME: { - ! DEVICE-SAME: num_teams({{.*}}) - ! DEVICE-SAME: thread_limit({{.*}}) + ! DEVICE-SAME: num_teams({{.*}}) thread_limit({{.*}}) !$omp teams num_teams(1) thread_limit(2) call foo() !$omp end teams @@ -29,13 +27,13 @@ subroutine teams() !$omp end target ! BOTH: omp.teams - ! BOTH-SAME: num_teams({{.*}}) - ! BOTH-SAME: thread_limit({{.*}}) + ! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) !$omp teams num_teams(1) thread_limit(2) call foo() !$omp end teams end subroutine teams +! BOTH-LABEL: func.func @_QPparallel subroutine parallel() ! BOTH: omp.target @@ -76,6 +74,7 @@ subroutine parallel() !$omp end parallel end subroutine parallel +! BOTH-LABEL: func.func @_QPdistribute_parallel_do subroutine distribute_parallel_do() ! BOTH: omp.target @@ -87,14 +86,14 @@ subroutine distribute_parallel_do() ! BOTH: omp.teams !$omp target teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! HOST-NOT: num_threads({{.*}}) ! HOST-SAME: { - + ! DEVICE-SAME: num_threads({{.*}}) - + + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -110,9 +109,9 @@ subroutine distribute_parallel_do() !$omp target teams call foo() - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -124,9 +123,9 @@ subroutine distribute_parallel_do() ! BOTH: omp.teams !$omp teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -136,6 +135,7 @@ subroutine distribute_parallel_do() !$omp end teams end subroutine distribute_parallel_do +! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd subroutine distribute_parallel_do_simd() ! BOTH: omp.target @@ -147,14 +147,14 @@ subroutine distribute_parallel_do_simd() ! BOTH: omp.teams !$omp target teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! HOST-NOT: num_threads({{.*}}) ! HOST-SAME: { ! DEVICE-SAME: num_threads({{.*}}) - + + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) @@ -171,9 +171,9 @@ subroutine distribute_parallel_do_simd() !$omp target teams call foo() - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) @@ -186,9 +186,9 @@ subroutine distribute_parallel_do_simd() ! BOTH: omp.teams !$omp teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) diff --git a/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir b/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir index 62f93efde9c643..61bc8cc60ecd22 100644 --- a/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir +++ b/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir @@ -17,8 +17,6 @@ func.func @_QPfoo() { // CHECK: omp.target omp.target map_entries(%map_info -> %arg1 : !fir.ref>) { ^bb0(%arg1: !fir.ref>): - - // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box> {pinned} %c1_2 = arith.constant 1 : index %21 = fir.shape %c1_2 : (index) -> !fir.shape<1> @@ -30,10 +28,11 @@ func.func @_QPfoo() { %c1_3 = arith.constant 1 : i32 %c10 = arith.constant 10 : i32 - // CHECK: omp.distribute - omp.distribute { - // CHECK: omp.parallel - omp.parallel { + // CHECK: omp.parallel + omp.parallel { + // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box> {pinned} + // CHECK: omp.distribute + omp.distribute { // CHECK: omp.wsloop omp.wsloop { // CHECK: omp.loop_nest diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index 402fbe62df58ad..402fd56b2fd816 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -19,10 +19,10 @@ program main ! ---------------------------------------------------------------------------- !$omp teams - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -36,12 +36,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -53,12 +53,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -70,12 +70,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -87,10 +87,10 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -111,10 +111,10 @@ program main ! ---------------------------------------------------------------------------- !$omp teams - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -125,12 +125,12 @@ program main end do !$omp end distribute parallel do - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -139,12 +139,12 @@ program main end do !$omp end distribute parallel do - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -823,10 +823,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -843,12 +843,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -863,12 +863,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -883,10 +883,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -903,10 +903,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -923,12 +923,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -946,10 +946,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -966,12 +966,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -986,12 +986,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1006,10 +1006,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1026,10 +1026,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1046,12 +1046,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1066,10 +1066,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1314,10 +1314,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1331,12 +1331,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1348,12 +1348,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1365,10 +1365,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1382,12 +1382,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1402,10 +1402,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1422,12 +1422,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1442,12 +1442,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1462,10 +1462,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1482,11 +1482,11 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1501,10 +1501,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop diff --git a/flang/test/Lower/OpenMP/loop-compound.f90 b/flang/test/Lower/OpenMP/loop-compound.f90 index 05d373d3dcdacb..cb614a0debdaf5 100644 --- a/flang/test/Lower/OpenMP/loop-compound.f90 +++ b/flang/test/Lower/OpenMP/loop-compound.f90 @@ -14,8 +14,8 @@ program main ! ---------------------------------------------------------------------------- !$omp teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -31,8 +31,8 @@ program main ! ---------------------------------------------------------------------------- !$omp teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp distribute parallel do @@ -144,8 +144,8 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -159,8 +159,8 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute parallel do @@ -196,8 +196,8 @@ program main ! TEAMS DISTRIBUTE PARALLEL DO SIMD ! ---------------------------------------------------------------------------- ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -210,8 +210,8 @@ program main ! TEAMS DISTRIBUTE PARALLEL DO ! ---------------------------------------------------------------------------- ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp teams distribute parallel do diff --git a/flang/test/Lower/OpenMP/target_private.f90 b/flang/test/Lower/OpenMP/target_private.f90 index 52471206113ff6..e45d128f41db36 100644 --- a/flang/test/Lower/OpenMP/target_private.f90 +++ b/flang/test/Lower/OpenMP/target_private.f90 @@ -45,14 +45,14 @@ subroutine omp_target_target_do_simd() !CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"} !CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"} !CHECK: omp.target -!CHECK-SAME: map_entries(%[[IV]] -> %{{.*}}, %[[VAR]] -> %{{.*}} +!CHECK-SAME: map_entries(%[[IV]] -> %[[MAP_IV:.*]], %[[VAR]] -> %[[MAP_VAR:.*]] : !fir.ref, !fir.ref) +!CHECK: %[[MAP_IV_DECL:.*]]:2 = hlfir.declare %[[MAP_IV]] +!CHECK: %[[MAP_VAR_DECL:.*]]:2 = hlfir.declare %[[MAP_VAR]] !CHECK: omp.teams { -!CHECK: %[[IV_PRIV:.*]] = fir.alloca i64 {bindc_name = "iv" +!CHECK: omp.parallel private(@{{.*}} %[[MAP_IV_DECL]]#0 -> %[[IV_PRIV:.*]] : !fir.ref, @{{.*}} %[[MAP_VAR_DECL]]#0 -> %[[VAR_PRIV:.*]] : !fir.ref) { !CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]] -!CHECK: %[[VAR_PRIV:.*]] = fir.alloca f64 {bindc_name = "var" !CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]] -!CHECK: omp.distribute { -!CHECK-NEXT: omp.parallel { +!CHECK: omp.distribute { !CHECK-NEXT: omp.wsloop { !CHECK-NEXT: omp.simd { !CHECK-NEXT: omp.loop_nest From 9a95922fbdb98440390a342f95b44bb619394c08 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 7 Aug 2024 11:47:42 +0100 Subject: [PATCH 3/3] [Flang][OpenMP] Update DO CONCURRENT conversion for the device This patch makes changes to the `DoConcurrentConversion` pass to follow the "hoisted omp.parallel" representation when converting `do concurrent` constructs into `target teams distribute parallel do`. --- .../Transforms/DoConcurrentConversion.cpp | 23 +++++++------------ .../Transforms/DoConcurrent/basic_device.f90 | 3 +-- .../multiple_iteration_ranges.f90 | 5 ++-- .../DoConcurrent/not_perfectly_nested.f90 | 4 +++- .../DoConcurrent/skip_all_nested_loops.f90 | 4 +++- 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp index a5de824c6cc0f7..912d33e0e38e9d 100644 --- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp @@ -599,9 +599,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, outermostLoopLives, targetClauseOps); - genTeamsOp(doLoop.getLoc(), rewriter, loopNest, mapper, - loopNestClauseOps); - genDistributeOp(doLoop.getLoc(), rewriter); + genTeamsOp(doLoop.getLoc(), rewriter); } mlir::omp::ParallelOp parallelOp = genParallelOp( @@ -611,6 +609,9 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), rewriter); + if (mapToDevice) + genDistributeOp(doLoop.getLoc(), rewriter); + mlir::omp::LoopNestOp ompLoopNest = genWsLoopOp(rewriter, loopNest.back().first, mapper, loopNestClauseOps); @@ -800,18 +801,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } mlir::omp::TeamsOp - genTeamsOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, - looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper, - mlir::omp::LoopNestOperands &loopNestClauseOps) const { + genTeamsOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { auto teamsOp = rewriter.create( loc, /*clauses=*/mlir::omp::TeamsOperands{}); rewriter.createBlock(&teamsOp.getRegion()); rewriter.setInsertionPoint(rewriter.create(loc)); - genLoopNestIndVarAllocs(rewriter, loopNest, mapper); - genLoopNestClauseOps(loc, rewriter, loopNest, mapper, loopNestClauseOps); - return teamsOp; } @@ -905,12 +902,8 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { rewriter.createBlock(¶llelOp.getRegion()); rewriter.setInsertionPoint(rewriter.create(loc)); - // If mapping to host, the local induction variable and loop bounds need to - // be emitted as part of the `omp.parallel` op. - if (!mapToDevice) { - genLoopNestIndVarAllocs(rewriter, loopNest, mapper); - genLoopNestClauseOps(loc, rewriter, loopNest, mapper, loopNestClauseOps); - } + genLoopNestIndVarAllocs(rewriter, loopNest, mapper); + genLoopNestClauseOps(loc, rewriter, loopNest, mapper, loopNestClauseOps); return parallelOp; } diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index b3d0f91ddd3e18..7873fa4f88db6d 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -43,6 +43,7 @@ program do_concurrent_basic ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] ! CHECK: omp.teams { + ! CHECK-NEXT: omp.parallel { ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -54,8 +55,6 @@ program do_concurrent_basic ! CHECK: %[[STEP:.*]] = arith.constant 1 : index ! CHECK-NEXT: omp.distribute { - ! CHECK-NEXT: omp.parallel { - ! CHECK-NEXT: omp.wsloop { ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index a0364612976bcb..17cf27a9b70b27 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -65,7 +65,7 @@ program main ! DEVICE: omp.target ! DEVICE: omp.teams -! HOST: omp.parallel { +! COMMON: omp.parallel { ! COMMON-NEXT: %[[ITER_VAR_I:.*]] = fir.alloca i32 {bindc_name = "i"} ! COMMON-NEXT: %[[BINDING_I:.*]]:2 = hlfir.declare %[[ITER_VAR_I]] {uniq_name = "_QFEi"} @@ -94,8 +94,7 @@ program main ! COMMON: %[[UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index ! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index -! DEVICE: omp.distribute -! DEVICE-NEXT: omp.parallel +! DEVICE: omp.distribute ! COMMON: omp.wsloop { ! COMMON-NEXT: omp.loop_nest diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index 559d26c39cba55..f3f2e78f5b3183 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -39,9 +39,11 @@ program main ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} ! DEVICE: omp.teams -! DEVICE: omp.distribute ! COMMON: omp.parallel { + +! DEVICE: omp.distribute + ! COMMON: omp.wsloop { ! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { ! COMMON: fir.do_loop %[[J_IV:.*]] = {{.*}} { diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 index 362b8685cfd15b..429500cead1073 100644 --- a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -39,9 +39,11 @@ program main ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} ! DEVICE: omp.teams -! DEVICE: omp.distribute ! COMMON: omp.parallel { + +! DEVICE: omp.distribute + ! COMMON: omp.wsloop { ! COMMON: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} { ! COMMON: fir.do_loop {{.*}} iter_args(%[[J_IV:.*]] = {{.*}}) -> {{.*}} {