From 13ae6e0fb17ff95c13a8c8ce70455a99dcc30cc8 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 13 Aug 2024 11:27:14 +0100 Subject: [PATCH] [Flang][OpenMP] Re-implement lowering of DISTRIBUTE PARALLEL DO (#135) This patch updates the Flang lowering process for `distribute parallel do` to follow the "hoisted `omp.parallel`" representation. Now temporary allocations produced while lowering the loop body reside inside of that operation instead of the loop wrappers' parent region. Special handling of `omp.parallel` with regards to alloca creation is removed, as it's no longer necessary to make this distinction. Impacted Lit tests are updated according to the new representation. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 135 +++++++++------ flang/lib/Optimizer/Builder/FIRBuilder.cpp | 24 +-- flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 13 +- .../lib/Optimizer/Transforms/StackArrays.cpp | 30 +--- .../test/Lower/OpenMP/eval-outside-target.f90 | 44 ++--- .../Lower/OpenMP/hlfir-to-fir-conv-omp.mlir | 11 +- flang/test/Lower/OpenMP/if-clause.f90 | 160 +++++++++--------- flang/test/Lower/OpenMP/loop-compound.f90 | 12 +- flang/test/Lower/OpenMP/target_private.f90 | 10 +- 9 files changed, 211 insertions(+), 228 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 662d0b56665f32..245a5e63ea1b7b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1905,18 +1905,23 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return parallelOp; } -// TODO: Replace with genWrapperOp calls. -static mlir::omp::ParallelOp genParallelWrapperOp( +static mlir::omp::ParallelOp genParallelCompositeOp( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const mlir::omp::ParallelOperands &clauseOps, + const List &clauses, lower::pft::Evaluation &eval, + mlir::Location loc, mlir::omp::ParallelOperands &clauseOps, mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, llvm::ArrayRef reductionSyms, llvm::ArrayRef reductionTypes, mlir::omp::TargetOp parentTarget, DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // Create omp.parallel wrapper. + if (enableDelayedPrivatization) { + const auto &privateClauseOps = dsp.getPrivateClauseOps(); + clauseOps.privateVars = privateClauseOps.privateVars; + clauseOps.privateSyms = privateClauseOps.privateSyms; + } + + // Create omp.parallel operation. auto parallelOp = firOpBuilder.create(loc, clauseOps); if (numThreadsClauseOps.numThreads) { @@ -1928,22 +1933,60 @@ static mlir::omp::ParallelOp genParallelWrapperOp( } // Populate entry block arguments with reduction and private variables. - mlir::OperandRange privateVars = parallelOp.getPrivateVars(); - llvm::SmallVector blockArgTypes(reductionTypes.begin(), reductionTypes.end()); - blockArgTypes.reserve(blockArgTypes.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(blockArgTypes), - [](mlir::Value v) { return v.getType(); }); - llvm::SmallVector blockArgLocs(reductionTypes.size(), loc); - blockArgLocs.reserve(blockArgLocs.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(blockArgLocs), - [](mlir::Value v) { return v.getLoc(); }); + llvm::SmallVector blockSyms(reductionSyms); + + if (enableDelayedPrivatization) { + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + + blockArgTypes.reserve(blockArgTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(blockArgTypes), + [](mlir::Value v) { return v.getType(); }); - firOpBuilder.createBlock(¶llelOp.getRegion(), {}, blockArgTypes, + blockArgLocs.reserve(blockArgLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(blockArgLocs), + [](mlir::Value v) { return v.getLoc(); }); + + llvm::append_range(blockSyms, dsp.getDelayedPrivSyms()); + } + + mlir::Region ®ion = parallelOp.getRegion(); + firOpBuilder.createBlock(®ion, /*insertPt=*/{}, blockArgTypes, blockArgLocs); + // Bind syms to block args. + unsigned argIdx = 0; + for (const semantics::Symbol *arg : blockSyms) { + auto bind = [&](const semantics::Symbol *sym) { + mlir::BlockArgument blockArg = region.getArgument(argIdx++); + converter.bindSymbol(*sym, hlfir::translateToExtendedValue( + loc, firOpBuilder, hlfir::Entity{blockArg}, + /*contiguousHint=*/ + evaluate::IsSimplyContiguous( + *sym, converter.getFoldingContext())) + .first); + }; + + if (const auto *commonDet = + arg->detailsIf()) { + for (const auto &mem : commonDet->objects()) + bind(&*mem); + } else + bind(arg); + } + + // Handle threadprivate and copyin, which would normally be done as part of + // `createBodyOfOp()`. However, when generating `omp.parallel` as part of a + // composite construct, we can't recursively lower its contents. This prevents + // us from being able to rely on the existing `genOpWithBody()` flow. + { + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); + threadPrivatizeVars(converter, eval); + } + ClauseProcessor(converter, semaCtx, clauses).processCopyin(); + firOpBuilder.setInsertionPoint( lower::genOpenMPTerminator(firOpBuilder, parallelOp, loc)); @@ -2505,11 +2548,7 @@ static void genCompositeDistributeParallelDo( findParentTargetOp(converter.getFirOpBuilder()); bool evalOutsideTarget = mustEvalTeamsThreadsOutsideTarget(eval, targetOp); - // Clause processing. - mlir::omp::DistributeOperands distributeClauseOps; - genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - distributeClauseOps); - + // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; mlir::omp::NumThreadsClauseOps numThreadsClauseOps; llvm::SmallVector parallelReductionSyms; @@ -2518,9 +2557,15 @@ static void genCompositeDistributeParallelDo( evalOutsideTarget, parallelClauseOps, numThreadsClauseOps, parallelReductionTypes, parallelReductionSyms); - const auto &privateClauseOps = dsp.getPrivateClauseOps(); - parallelClauseOps.privateVars = privateClauseOps.privateVars; - parallelClauseOps.privateSyms = privateClauseOps.privateSyms; + genParallelCompositeOp(converter, semaCtx, item->clauses, eval, loc, + parallelClauseOps, numThreadsClauseOps, + parallelReductionSyms, parallelReductionTypes, + evalOutsideTarget ? targetOp : nullptr, dsp); + + // Clause processing. + mlir::omp::DistributeOperands distributeClauseOps; + genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, + distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; @@ -2538,11 +2583,6 @@ static void genCompositeDistributeParallelDo( auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); - auto parallelOp = genParallelWrapperOp( - converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, - parallelReductionSyms, parallelReductionTypes, - evalOutsideTarget ? targetOp : nullptr, dsp); - // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopReductionTypes); @@ -2550,14 +2590,10 @@ static void genCompositeDistributeParallelDo( // Construct wrapper entry block list and associated symbols. It is important // that the symbol order and the block argument order match, so that the // symbol-value bindings created are correct. - auto wrapperSyms = - llvm::to_vector(llvm::concat( - parallelReductionSyms, dsp.getDelayedPrivSyms(), - wsloopReductionSyms)); + auto &wrapperSyms = wsloopReductionSyms; auto wrapperArgs = llvm::to_vector( llvm::concat(distributeOp.getRegion().getArguments(), - parallelOp.getRegion().getArguments(), wsloopOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, @@ -2576,11 +2612,7 @@ static void genCompositeDistributeParallelDoSimd( findParentTargetOp(converter.getFirOpBuilder()); bool evalOutsideTarget = mustEvalTeamsThreadsOutsideTarget(eval, targetOp); - // Clause processing. - mlir::omp::DistributeOperands distributeClauseOps; - genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - distributeClauseOps); - + // Create parent omp.parallel first. mlir::omp::ParallelOperands parallelClauseOps; mlir::omp::NumThreadsClauseOps numThreadsClauseOps; llvm::SmallVector parallelReductionSyms; @@ -2589,9 +2621,15 @@ static void genCompositeDistributeParallelDoSimd( evalOutsideTarget, parallelClauseOps, numThreadsClauseOps, parallelReductionTypes, parallelReductionSyms); - const auto &privateClauseOps = dsp.getPrivateClauseOps(); - parallelClauseOps.privateVars = privateClauseOps.privateVars; - parallelClauseOps.privateSyms = privateClauseOps.privateSyms; + genParallelCompositeOp(converter, semaCtx, item->clauses, eval, loc, + parallelClauseOps, numThreadsClauseOps, + parallelReductionSyms, parallelReductionTypes, + evalOutsideTarget ? targetOp : nullptr, dsp); + + // Clause processing. + mlir::omp::DistributeOperands distributeClauseOps; + genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, + distributeClauseOps); mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; @@ -2612,11 +2650,6 @@ static void genCompositeDistributeParallelDoSimd( auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); - auto parallelOp = genParallelWrapperOp( - converter, semaCtx, eval, loc, parallelClauseOps, numThreadsClauseOps, - parallelReductionSyms, parallelReductionTypes, - evalOutsideTarget ? targetOp : nullptr, dsp); - // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopReductionTypes); @@ -2628,14 +2661,10 @@ static void genCompositeDistributeParallelDoSimd( // Construct wrapper entry block list and associated symbols. It is important // that the symbol order and the block argument order match, so that the // symbol-value bindings created are correct. - auto wrapperSyms = - llvm::to_vector(llvm::concat( - parallelReductionSyms, dsp.getDelayedPrivSyms(), - wsloopReductionSyms)); + auto &wrapperSyms = wsloopReductionSyms; auto wrapperArgs = llvm::to_vector(llvm::concat( distributeOp.getRegion().getArguments(), - parallelOp.getRegion().getArguments(), wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, @@ -2756,10 +2785,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { + // Used delayed privatization for 'distribute parallel do [simd]'. + bool useDelayedPrivatization = llvm::omp::allParallelSet.test(item->id); symTable.pushScope(); loopDsp.emplace(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); + useDelayedPrivatization, &symTable); loopDsp->processStep1(); loopDsp->processStep2(); } diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index c45d3b9d7a2f3d..d54715d3fa3f56 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -256,19 +256,7 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto ompOutlineableIface = getRegion() .getParentOfType()) { - // omp.parallel can work as a block construct but it can also be a loop - // wrapper when part of a composite construct. Make sure it's only treated - // as a block if it's not a wrapper. - auto parallelOp = - llvm::dyn_cast(*ompOutlineableIface); - if (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())) - return ompOutlineableIface.getAllocaBlock(); - - if (auto parentOutlineable = - parallelOp - ->getParentOfType()) - return parentOutlineable.getAllocaBlock(); + return ompOutlineableIface.getAllocaBlock(); } if (auto recipeIface = @@ -285,15 +273,9 @@ mlir::Value fir::FirOpBuilder::createTemporaryAlloc( llvm::ArrayRef attrs) { assert(!mlir::isa(type) && "cannot be a reference"); // If the alloca is inside an OpenMP Op which will be outlined then pin - // the alloca here. Make sure that an omp.parallel operation that is taking - // a loop wrapper role is not detected as outlineable here. - auto iface = - getRegion().getParentOfType(); - auto parallelOp = - iface ? llvm::dyn_cast(*iface) : nullptr; + // the alloca here. const bool pinned = - iface && (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())); + getRegion().getParentOfType(); mlir::Value temp = create(loc, type, /*unique_name=*/llvm::StringRef{}, name, pinned, lenParams, shape, attrs); diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 22026f1d258716..0e114f069be66d 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -285,16 +285,9 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize( // 4. The first ancestor that is one of the above. mlir::Block *ConvertFIRToLLVMPattern::getBlockForAllocaInsert( mlir::Operation *op, mlir::Region *parentRegion) const { - if (auto iface = - mlir::dyn_cast(op)) { - // omp.parallel can work as a block construct but it can also be a loop - // wrapper when it's part of a composite construct. Make sure it's only - // treated as a block if it's not a wrapper. - auto parallelOp = llvm::dyn_cast(*iface); - if (!parallelOp || !llvm::isa_and_present( - parallelOp->getParentOp())) - return iface.getAllocaBlock(); - } + if (auto outlineableIface = + mlir::dyn_cast(op)) + return outlineableIface.getAllocaBlock(); if (auto recipeIface = mlir::dyn_cast(op)) return recipeIface.getAllocaBlock(*parentRegion); if (auto llvmFuncOp = mlir::dyn_cast(op)) diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index d6eae5ec5fdde0..bdc2d9cd9c6c43 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -589,31 +589,8 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { return {point}; }; - // Find the first OpenMP outlineable parent region while taking into account - // the possibility of finding an omp.parallel region that is taking a loop - // wrapper role. These operations must be skipped, as they cannot hold - // allocations. - const auto findOmpRegion = [](mlir::Operation *op) { - auto findOmpRegionImpl = - [](mlir::Operation *op, - auto &findOmpRegion) -> mlir::omp::OutlineableOpenMPOpInterface { - auto ompRegion = - op->getParentOfType(); - if (!ompRegion) - return nullptr; - - if (auto parallelOp = - mlir::dyn_cast_if_present(*ompRegion)) { - mlir::Operation *parentOp = parallelOp->getParentOp(); - if (mlir::isa_and_present(parentOp)) - return findOmpRegion(parentOp, findOmpRegion); - } - return ompRegion; - }; - return findOmpRegionImpl(op, findOmpRegionImpl); - }; - - auto oldOmpRegion = findOmpRegion(oldAlloc); + auto oldOmpRegion = + oldAlloc->getParentOfType(); // Find when the last operand value becomes available mlir::Block *operandsBlock = nullptr; @@ -641,7 +618,8 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { LLVM_DEBUG(llvm::dbgs() << "--Placing after last operand: " << *lastOperand << "\n"); // check we aren't moving out of an omp region - auto lastOpOmpRegion = findOmpRegion(lastOperand); + auto lastOpOmpRegion = + lastOperand->getParentOfType(); if (lastOpOmpRegion == oldOmpRegion) return checkReturn(lastOperand); // Presumably this happened because the operands became ready before the diff --git a/flang/test/Lower/OpenMP/eval-outside-target.f90 b/flang/test/Lower/OpenMP/eval-outside-target.f90 index 5d4a8a104c8952..ef578610e8e908 100644 --- a/flang/test/Lower/OpenMP/eval-outside-target.f90 +++ b/flang/test/Lower/OpenMP/eval-outside-target.f90 @@ -2,12 +2,11 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 %s -o - | FileCheck %s --check-prefixes=BOTH,HOST ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE -! CHECK-LABEL: func.func @_QPteams +! BOTH-LABEL: func.func @_QPteams subroutine teams() ! BOTH: omp.target - ! HOST-SAME: num_teams({{.*}}) - ! HOST-SAME: teams_thread_limit({{.*}}) + ! HOST-SAME: num_teams({{.*}}) teams_thread_limit({{.*}}) ! DEVICE-NOT: num_teams({{.*}}) ! DEVICE-NOT: teams_thread_limit({{.*}}) @@ -20,8 +19,7 @@ subroutine teams() ! HOST-NOT: thread_limit({{.*}}) ! HOST-SAME: { - ! DEVICE-SAME: num_teams({{.*}}) - ! DEVICE-SAME: thread_limit({{.*}}) + ! DEVICE-SAME: num_teams({{.*}}) thread_limit({{.*}}) !$omp teams num_teams(1) thread_limit(2) call foo() !$omp end teams @@ -29,13 +27,13 @@ subroutine teams() !$omp end target ! BOTH: omp.teams - ! BOTH-SAME: num_teams({{.*}}) - ! BOTH-SAME: thread_limit({{.*}}) + ! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) !$omp teams num_teams(1) thread_limit(2) call foo() !$omp end teams end subroutine teams +! BOTH-LABEL: func.func @_QPparallel subroutine parallel() ! BOTH: omp.target @@ -76,6 +74,7 @@ subroutine parallel() !$omp end parallel end subroutine parallel +! BOTH-LABEL: func.func @_QPdistribute_parallel_do subroutine distribute_parallel_do() ! BOTH: omp.target @@ -87,14 +86,14 @@ subroutine distribute_parallel_do() ! BOTH: omp.teams !$omp target teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! HOST-NOT: num_threads({{.*}}) ! HOST-SAME: { - + ! DEVICE-SAME: num_threads({{.*}}) - + + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -110,9 +109,9 @@ subroutine distribute_parallel_do() !$omp target teams call foo() - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -124,9 +123,9 @@ subroutine distribute_parallel_do() ! BOTH: omp.teams !$omp teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop !$omp distribute parallel do num_threads(1) do i=1,10 @@ -136,6 +135,7 @@ subroutine distribute_parallel_do() !$omp end teams end subroutine distribute_parallel_do +! BOTH-LABEL: func.func @_QPdistribute_parallel_do_simd subroutine distribute_parallel_do_simd() ! BOTH: omp.target @@ -147,14 +147,14 @@ subroutine distribute_parallel_do_simd() ! BOTH: omp.teams !$omp target teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! HOST-NOT: num_threads({{.*}}) ! HOST-SAME: { ! DEVICE-SAME: num_threads({{.*}}) - + + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) @@ -171,9 +171,9 @@ subroutine distribute_parallel_do_simd() !$omp target teams call foo() - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) @@ -186,9 +186,9 @@ subroutine distribute_parallel_do_simd() ! BOTH: omp.teams !$omp teams - ! BOTH: omp.distribute - ! BOTH-NEXT: omp.parallel + ! BOTH: omp.parallel ! BOTH-SAME: num_threads({{.*}}) + ! BOTH: omp.distribute ! BOTH-NEXT: omp.wsloop ! BOTH-NEXT: omp.simd !$omp distribute parallel do simd num_threads(1) diff --git a/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir b/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir index 62f93efde9c643..61bc8cc60ecd22 100644 --- a/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir +++ b/flang/test/Lower/OpenMP/hlfir-to-fir-conv-omp.mlir @@ -17,8 +17,6 @@ func.func @_QPfoo() { // CHECK: omp.target omp.target map_entries(%map_info -> %arg1 : !fir.ref>) { ^bb0(%arg1: !fir.ref>): - - // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box> {pinned} %c1_2 = arith.constant 1 : index %21 = fir.shape %c1_2 : (index) -> !fir.shape<1> @@ -30,10 +28,11 @@ func.func @_QPfoo() { %c1_3 = arith.constant 1 : i32 %c10 = arith.constant 10 : i32 - // CHECK: omp.distribute - omp.distribute { - // CHECK: omp.parallel - omp.parallel { + // CHECK: omp.parallel + omp.parallel { + // CHECK: %[[TO_BOX_ALLOC:.*]] = fir.alloca !fir.box> {pinned} + // CHECK: omp.distribute + omp.distribute { // CHECK: omp.wsloop omp.wsloop { // CHECK: omp.loop_nest diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index 402fbe62df58ad..402fd56b2fd816 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -19,10 +19,10 @@ program main ! ---------------------------------------------------------------------------- !$omp teams - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -36,12 +36,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -53,12 +53,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -70,12 +70,12 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -87,10 +87,10 @@ program main end do !$omp end distribute parallel do simd - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -111,10 +111,10 @@ program main ! ---------------------------------------------------------------------------- !$omp teams - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -125,12 +125,12 @@ program main end do !$omp end distribute parallel do - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -139,12 +139,12 @@ program main end do !$omp end distribute parallel do - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -823,10 +823,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -843,12 +843,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -863,12 +863,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -883,10 +883,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -903,10 +903,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -923,12 +923,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -946,10 +946,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -966,12 +966,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -986,12 +986,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1006,10 +1006,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1026,10 +1026,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1046,12 +1046,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1066,10 +1066,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1314,10 +1314,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1331,12 +1331,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1348,12 +1348,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1365,10 +1365,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1382,12 +1382,12 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1402,10 +1402,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1422,12 +1422,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1442,12 +1442,12 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute - ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1462,10 +1462,10 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop @@ -1482,11 +1482,11 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel - ! CHECK-SAME: if({{.*}}) ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1501,10 +1501,10 @@ program main ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.distribute + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.parallel + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop diff --git a/flang/test/Lower/OpenMP/loop-compound.f90 b/flang/test/Lower/OpenMP/loop-compound.f90 index 05d373d3dcdacb..cb614a0debdaf5 100644 --- a/flang/test/Lower/OpenMP/loop-compound.f90 +++ b/flang/test/Lower/OpenMP/loop-compound.f90 @@ -14,8 +14,8 @@ program main ! ---------------------------------------------------------------------------- !$omp teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -31,8 +31,8 @@ program main ! ---------------------------------------------------------------------------- !$omp teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp distribute parallel do @@ -144,8 +144,8 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -159,8 +159,8 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute parallel do @@ -196,8 +196,8 @@ program main ! TEAMS DISTRIBUTE PARALLEL DO SIMD ! ---------------------------------------------------------------------------- ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.simd ! CHECK-NEXT: omp.loop_nest @@ -210,8 +210,8 @@ program main ! TEAMS DISTRIBUTE PARALLEL DO ! ---------------------------------------------------------------------------- ! CHECK: omp.teams + ! CHECK: omp.parallel ! CHECK: omp.distribute - ! CHECK-NEXT: omp.parallel ! CHECK-NEXT: omp.wsloop ! CHECK-NEXT: omp.loop_nest !$omp teams distribute parallel do diff --git a/flang/test/Lower/OpenMP/target_private.f90 b/flang/test/Lower/OpenMP/target_private.f90 index 52471206113ff6..e45d128f41db36 100644 --- a/flang/test/Lower/OpenMP/target_private.f90 +++ b/flang/test/Lower/OpenMP/target_private.f90 @@ -45,14 +45,14 @@ subroutine omp_target_target_do_simd() !CHECK: %[[IV:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "iv"} !CHECK: %[[VAR:.*]] = omp.map.info{{.*}}map_clauses(implicit{{.*}}{name = "var"} !CHECK: omp.target -!CHECK-SAME: map_entries(%[[IV]] -> %{{.*}}, %[[VAR]] -> %{{.*}} +!CHECK-SAME: map_entries(%[[IV]] -> %[[MAP_IV:.*]], %[[VAR]] -> %[[MAP_VAR:.*]] : !fir.ref, !fir.ref) +!CHECK: %[[MAP_IV_DECL:.*]]:2 = hlfir.declare %[[MAP_IV]] +!CHECK: %[[MAP_VAR_DECL:.*]]:2 = hlfir.declare %[[MAP_VAR]] !CHECK: omp.teams { -!CHECK: %[[IV_PRIV:.*]] = fir.alloca i64 {bindc_name = "iv" +!CHECK: omp.parallel private(@{{.*}} %[[MAP_IV_DECL]]#0 -> %[[IV_PRIV:.*]] : !fir.ref, @{{.*}} %[[MAP_VAR_DECL]]#0 -> %[[VAR_PRIV:.*]] : !fir.ref) { !CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %[[IV_PRIV]] -!CHECK: %[[VAR_PRIV:.*]] = fir.alloca f64 {bindc_name = "var" !CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_PRIV]] -!CHECK: omp.distribute { -!CHECK-NEXT: omp.parallel { +!CHECK: omp.distribute { !CHECK-NEXT: omp.wsloop { !CHECK-NEXT: omp.simd { !CHECK-NEXT: omp.loop_nest