diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 6f3e42510ac6aa..b1fb2d8431a530 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -24,11 +24,13 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include #include +#include #include namespace flangomp { @@ -36,7 +38,8 @@ namespace flangomp { #include "flang/Optimizer/OpenMP/Passes.h.inc" } // namespace flangomp -#define DEBUG_TYPE "fopenmp-do-concurrent-conversion" +#define DEBUG_TYPE "do-concurrent-conversion" +#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ") namespace Fortran { namespace lower { @@ -255,29 +258,59 @@ bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) { return false; } +mlir::Value findLoopIndVar(fir::DoLoopOp doLoop) { + mlir::Value result = nullptr; + mlir::visitUsedValuesDefinedAbove( + doLoop.getRegion(), [&](mlir::OpOperand *operand) { + if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) + result = operand->get(); + }); + + assert(result != nullptr); + return result; +} + /// Collect the list of values used inside the loop but defined outside of it. /// The first item in the returned list is always the loop's induction /// variable. -void collectLoopLiveIns(fir::DoLoopOp doLoop, - llvm::SmallVectorImpl &liveIns) { +void collectLoopNestLiveIns( + LoopNestToIndVarMap &loopNest, llvm::SmallVectorImpl &liveIns, + llvm::DenseMap *liveInToName = nullptr) { llvm::SmallDenseSet seenValues; llvm::SmallDenseSet seenOps; - mlir::visitUsedValuesDefinedAbove( - doLoop.getRegion(), [&](mlir::OpOperand *operand) { - if (!seenValues.insert(operand->get()).second) - return; + auto addValueToLiveIns = [&](mlir::Value liveIn) { + if (!seenValues.insert(liveIn).second) + return false; - mlir::Operation *definingOp = operand->get().getDefiningOp(); - // We want to collect ops corresponding to live-ins only once. - if (definingOp && !seenOps.insert(definingOp).second) - return; + mlir::Operation *definingOp = liveIn.getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return false; - liveIns.push_back(operand->get()); + liveIns.push_back(liveIn); + return true; + }; - if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) - std::swap(*liveIns.begin(), *liveIns.rbegin()); - }); + size_t nestLevel = 0; + for (auto [loop, _] : loopNest) { + auto addBoundOrStepToLiveIns = [&](mlir::Value operand, std::string name) { + (*liveInToName)[operand] = name; + addValueToLiveIns(operand); + }; + + addBoundOrStepToLiveIns(loop.getLowerBound(), + "loop." + std::to_string(nestLevel) + ".lb"); + addBoundOrStepToLiveIns(loop.getUpperBound(), + "loop." + std::to_string(nestLevel) + ".ub"); + addBoundOrStepToLiveIns(loop.getStep(), + "loop." + std::to_string(nestLevel) + ".step"); + ++nestLevel; + } + + mlir::visitUsedValuesDefinedAbove( + loopNest.front().first.getRegion(), + [&](mlir::OpOperand *operand) { addValueToLiveIns(operand->get()); }); } /// Collects the op(s) responsible for updating a loop's iteration variable with @@ -366,24 +399,96 @@ void collectIndirectConstOpChain(mlir::Operation *link, opChain.insert(link); } +/// Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff +/// there are no operations in \p outerloop's other than: +/// +/// 1. those operations needed to setup \p innerLoop's LB, UB, and step values, +/// 2. the operations needed to assing/update \p outerLoop's induction variable. +/// 3. \p innerLoop itself. +/// +/// \p return true if \p innerLoop is perfectly nested inside \p outerLoop +/// according to the above definition. +bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) { + mlir::BackwardSliceOptions backwardSliceOptions; + backwardSliceOptions.inclusive = true; + // We will collect the backward slices for innerLoop's LB, UB, and step. + // However, we want to limit the scope of these slices to the scope of + // outerLoop's region. + backwardSliceOptions.filter = [&](mlir::Operation *op) { + return !mlir::areValuesDefinedAbove(op->getResults(), + outerLoop.getRegion()); + }; + + llvm::SetVector lbSlice; + mlir::getBackwardSlice(innerLoop.getLowerBound(), &lbSlice, + backwardSliceOptions); + + llvm::SetVector ubSlice; + mlir::getBackwardSlice(innerLoop.getUpperBound(), &ubSlice, + backwardSliceOptions); + + llvm::SetVector stepSlice; + mlir::getBackwardSlice(innerLoop.getStep(), &stepSlice, backwardSliceOptions); + + mlir::ForwardSliceOptions forwardSliceOptions; + forwardSliceOptions.inclusive = true; + // We don't care of the outer loop's induction variable's uses within the + // inner loop, so we filter out these uses. + forwardSliceOptions.filter = [&](mlir::Operation *op) { + return mlir::areValuesDefinedAbove(op->getResults(), innerLoop.getRegion()); + }; + + llvm::SetVector indVarSlice; + mlir::getForwardSlice(outerLoop.getInductionVar(), &indVarSlice, + forwardSliceOptions); + + llvm::SetVector innerLoopSetupOpsVec; + innerLoopSetupOpsVec.set_union(indVarSlice); + innerLoopSetupOpsVec.set_union(lbSlice); + innerLoopSetupOpsVec.set_union(ubSlice); + innerLoopSetupOpsVec.set_union(stepSlice); + llvm::DenseSet innerLoopSetupOpsSet; + + for (mlir::Operation *op : innerLoopSetupOpsVec) + innerLoopSetupOpsSet.insert(op); + + llvm::DenseSet loopBodySet; + outerLoop.walk([&](mlir::Operation *op) { + if (op == outerLoop) + return mlir::WalkResult::advance(); + + if (op == innerLoop) + return mlir::WalkResult::skip(); + + if (op->hasTrait()) + return mlir::WalkResult::advance(); + + loopBodySet.insert(op); + return mlir::WalkResult::advance(); + }); + + bool result = (loopBodySet == innerLoopSetupOpsSet); + mlir::Location loc = outerLoop.getLoc(); + LLVM_DEBUG(DBGS() << "Loop pair starting at location " << loc << " is" + << (result ? "" : " not") << " perfectly nested\n"); + return result; +} + /// Starting with `outerLoop` collect a perfectly nested loop nest, if any. This /// function collects as much as possible loops in the nest; it case it fails to /// recognize a certain nested loop as part of the nest it just returns the /// parent loops it discovered before. -mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, +mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, LoopNestToIndVarMap &loopNest) { - assert(outerLoop.getUnordered()); - llvm::SmallVector outerLoopLiveIns; - collectLoopLiveIns(outerLoop, outerLoopLiveIns); - + assert(currentLoop.getUnordered()); while (true) { loopNest.try_emplace( - outerLoop, + currentLoop, InductionVariableInfo{ - outerLoopLiveIns.front().getDefiningOp(), - std::move(looputils::extractIndVarUpdateOps(outerLoop))}); + findLoopIndVar(currentLoop).getDefiningOp(), + std::move(looputils::extractIndVarUpdateOps(currentLoop))}); - auto directlyNestedLoops = outerLoop.getRegion().getOps(); + auto directlyNestedLoops = currentLoop.getRegion().getOps(); llvm::SmallVector unorderedLoops; for (auto nestedLoop : directlyNestedLoops) @@ -403,64 +508,10 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, (nestedUnorderedLoop.getStep().getDefiningOp() == nullptr)) return mlir::failure(); - llvm::SmallVector nestedLiveIns; - collectLoopLiveIns(nestedUnorderedLoop, nestedLiveIns); - - llvm::DenseSet outerLiveInsSet; - llvm::DenseSet nestedLiveInsSet; - - // Returns a "unified" view of an mlir::Value. This utility checks if the - // value is defined by an op, and if so, return the first value defined by - // that op (if there are many), otherwise just returns the value. - // - // This serves the purpose that if, for example, `%op_res#0` is used in the - // outer loop and `%op_res#1` is used in the nested loop (or vice versa), - // that we detect both as the same value. If we did not do so, we might - // falesely detect that the 2 loops are not perfectly nested since they use - // "different" sets of values. - auto getUnifiedLiveInView = [](mlir::Value liveIn) { - return liveIn.getDefiningOp() != nullptr - ? liveIn.getDefiningOp()->getResult(0) - : liveIn; - }; - - // Re-package both lists of live-ins into sets so that we can use set - // equality to compare the values used in the outerloop vs. the nestd one. - - for (auto liveIn : nestedLiveIns) - nestedLiveInsSet.insert(getUnifiedLiveInView(liveIn)); - - mlir::Value outerLoopIV; - for (auto liveIn : outerLoopLiveIns) { - outerLiveInsSet.insert(getUnifiedLiveInView(liveIn)); - - // Keep track of the IV of the outerloop. See `isPerfectlyNested` for more - // info on the reason. - if (outerLoopIV == nullptr) - outerLoopIV = getUnifiedLiveInView(liveIn); - } - - // For the 2 loops to be perfectly nested, either: - // * both would have exactly the same set of live-in values or, - // * the outer loop would have exactly 1 extra live-in value: the outer - // loop's induction variable; this happens when the outer loop's IV is - // *not* referenced in the nested loop. - bool isPerfectlyNested = [&]() { - if (outerLiveInsSet == nestedLiveInsSet) - return true; - - if ((outerLiveInsSet.size() == nestedLiveIns.size() + 1) && - !nestedLiveInsSet.contains(outerLoopIV)) - return true; - - return false; - }(); - - if (!isPerfectlyNested) + if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop)) return mlir::failure(); - outerLoop = nestedUnorderedLoop; - outerLoopLiveIns = std::move(nestedLiveIns); + currentLoop = nestedUnorderedLoop; } return mlir::success(); @@ -634,10 +685,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "defining operation."); } - llvm::SmallVector outermostLoopLiveIns; - looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns); - assert(!outermostLoopLiveIns.empty()); - looputils::LoopNestToIndVarMap loopNest; bool hasRemainingNestedLoops = failed(looputils::collectLoopNest(doLoop, loopNest)); @@ -646,15 +693,57 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "Some `do concurent` loops are not perfectly-nested. " "These will be serialzied."); + llvm::DenseMap liveInToName; + llvm::SmallVector loopNestLiveIns; + + // TODO outline into a separete function. This hoists the ops to compute + // bounds of all loops in the entire loop nest outside the outermost loop. + // Without this hoisting, values/variables that are required to compute + // these bounds will be stuck inside the original `fir.do_loop` ops and + // therefore their SSA values won't be visible from within the `target` + // region. + { + fir::DoLoopOp outermostLoop = loopNest.front().first; + + mlir::BackwardSliceOptions backwardSliceOptions; + backwardSliceOptions.inclusive = true; + // We will collect the backward slices for innerLoop's LB, UB, and step. + // However, we want to limit the scope of these slices to the scope of + // outerLoop's region. + backwardSliceOptions.filter = [&](mlir::Operation *op) { + return !mlir::areValuesDefinedAbove(op->getResults(), + outermostLoop.getRegion()); + }; + + for (auto [loop, _] : loopNest) { + auto moveBoundOrStepOutOfLoopNest = [&](mlir::Value operand) { + llvm::SetVector loopOperandSlice; + mlir::getBackwardSlice(operand, &loopOperandSlice, + backwardSliceOptions); + + for (mlir::Operation *sliceOp : loopOperandSlice) { + outermostLoop.moveOutOfLoop(sliceOp); + } + }; + + moveBoundOrStepOutOfLoopNest(loop.getLowerBound()); + moveBoundOrStepOutOfLoopNest(loop.getUpperBound()); + moveBoundOrStepOutOfLoopNest(loop.getStep()); + } + } + + looputils::collectLoopNestLiveIns(loopNest, loopNestLiveIns, &liveInToName); + assert(!loopNestLiveIns.empty()); + llvm::SetVector locals; looputils::collectLoopLocalValues(loopNest.back().first, locals); // We do not want to map "loop-local" values to the device through // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. - outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns, - [&](mlir::Value liveIn) { - return locals.contains(liveIn); - }), - outermostLoopLiveIns.end()); + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); looputils::sinkLoopIVArgs(rewriter, loopNest); @@ -669,12 +758,13 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // The outermost loop will contain all the live-in values in all nested // loops since live-in values are collected recursively for all nested // ops. - for (mlir::Value liveIn : outermostLoopLiveIns) + for (mlir::Value liveIn : loopNestLiveIns) { targetClauseOps.mapVars.push_back( - genMapInfoOpForLiveIn(rewriter, liveIn)); + genMapInfoOpForLiveIn(rewriter, liveIn, liveInToName)); + } - targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, - outermostLoopLiveIns, targetClauseOps); + targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps); genTeamsOp(doLoop.getLoc(), rewriter); } @@ -727,14 +817,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { private: void genBoundsOps(mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, hlfir::DeclareOp declareOp, + mlir::Location loc, mlir::Value shape, llvm::SmallVectorImpl &boundsOps) const { - if (declareOp.getShape() == nullptr) { + if (shape == nullptr) { return; } - auto shapeOp = mlir::dyn_cast_if_present( - declareOp.getShape().getDefiningOp()); + auto shapeOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); if (shapeOp == nullptr) TODO(loc, "Shapes not defined by shape op's are not supported yet."); @@ -759,15 +849,36 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { boundsOps.push_back(genBoundsOp(extent)); } - mlir::omp::MapInfoOp - genMapInfoOpForLiveIn(mlir::ConversionPatternRewriter &rewriter, - mlir::Value liveIn) const { - auto declareOp = - mlir::dyn_cast_if_present(liveIn.getDefiningOp()); + mlir::omp::MapInfoOp genMapInfoOpForLiveIn( + mlir::ConversionPatternRewriter &rewriter, mlir::Value liveIn, + const llvm::DenseMap &liveInToName) const { + mlir::Value rawAddr = liveIn; + mlir::Value shape = nullptr; + std::string name = ""; - if (declareOp == nullptr) - TODO(liveIn.getLoc(), - "Values not defined by declare op's are not supported yet."); + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + shape = declareOp.getShape(); + name = declareOp.getUniqName().str(); + } else if (liveInToName.contains(liveIn)) + name = liveInToName.at(liveIn); + + if (!llvm::isa(rawAddr.getType())) { + fir::FirOpBuilder builder( + rewriter, fir::getKindMapping( + liveInDefiningOp->getParentOfType())); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } mlir::Type liveInType = liveIn.getType(); mlir::Type eleType = liveInType; @@ -787,15 +898,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } llvm::SmallVector boundsOps; - genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps); + genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps); - // Use the raw address to avoid unboxing `fir.box` values whenever possible. - // Put differently, if we have access to the direct value memory - // reference/address, we use it. - mlir::Value rawAddr = declareOp.getOriginalBase(); return Fortran::lower::omp::internal::createMapInfoOp( rewriter, liveIn.getLoc(), rawAddr, - /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps, + /*varPtrPtr=*/{}, name, boundsOps, /*members=*/{}, /*membersIndex=*/mlir::ArrayAttr{}, static_cast< @@ -816,34 +923,40 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { llvm::SmallVector liveInTypes; llvm::SmallVector liveInLocs; - for (mlir::Value liveIn : liveIns) { - liveInTypes.push_back(liveIn.getType()); - liveInLocs.push_back(liveIn.getLoc()); + for (mlir::Value mapInfoOp : clauseOps.mapVars) { + auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); + liveInTypes.push_back(miOp.getVarPtr().getType()); + liveInLocs.push_back(miOp.getVarPtr().getLoc()); } rewriter.createBlock(®ion, {}, liveInTypes, liveInLocs); + fir::FirOpBuilder firBuilder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType())); - for (auto [arg, mapInfoOp] : - llvm::zip_equal(region.getArguments(), clauseOps.mapVars)) { + for (auto [liveIn, arg, mapInfoOp] : + llvm::zip_equal(liveIns, region.getArguments(), clauseOps.mapVars)) { auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); - mlir::Value miOperand = miOp.getVariableOperand(0); - // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // TODO If `liveIn.getDefiningOp()` is a `fir::BoxAddrOp`, we probably // need to "unpack" the box by getting the defining op of it's value. // However, we did not hit this case in reality yet so leaving it as a // todo for now. - mapper.map(miOperand, liveInDeclare.getOriginalBase()); + if (!llvm::isa(liveIn.getType())) + mapper.map(liveIn, + firBuilder.loadIfRef(liveIn.getLoc(), + liveInDeclare.getOriginalBase())); + else + mapper.map(liveIn, liveInDeclare.getOriginalBase()); if (auto origDeclareOp = mlir::dyn_cast_if_present( - miOperand.getDefiningOp())) + liveIn.getDefiningOp())) { mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); + } } - fir::FirOpBuilder firBuilder( - rewriter, - fir::getKindMapping(targetOp->getParentOfType())); Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, targetOp); rewriter.setInsertionPoint( @@ -924,24 +1037,31 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { looputils::collectIndirectConstOpChain(operation, opChain); mlir::Operation *result; - for (mlir::Operation *link : opChain) + for (mlir::Operation *link : opChain) { result = rewriter.clone(*link, mapper); + } return result; }; for (auto &[doLoop, _] : loopNest) { - mlir::Operation *lbOp = doLoop.getLowerBound().getDefiningOp(); - loopNestClauseOps.loopLowerBounds.push_back( - cloneBoundOrStepOpChain(lbOp)->getResult(0)); - - mlir::Operation *ubOp = doLoop.getUpperBound().getDefiningOp(); - loopNestClauseOps.loopUpperBounds.push_back( - cloneBoundOrStepOpChain(ubOp)->getResult(0)); - - mlir::Operation *stepOp = doLoop.getStep().getDefiningOp(); - loopNestClauseOps.loopSteps.push_back( - cloneBoundOrStepOpChain(stepOp)->getResult(0)); + auto addBoundsOrStep = + [&](mlir::Value value, + llvm::SmallVectorImpl &boundsOrStepVec) { + if (mapper.contains(value)) + boundsOrStepVec.push_back(mapper.lookup(value)); + else { + mlir::Operation *definingOp = value.getDefiningOp(); + boundsOrStepVec.push_back( + cloneBoundOrStepOpChain(definingOp)->getResult(0)); + } + }; + + addBoundsOrStep(doLoop.getLowerBound(), + loopNestClauseOps.loopLowerBounds); + addBoundsOrStep(doLoop.getUpperBound(), + loopNestClauseOps.loopUpperBounds); + addBoundsOrStep(doLoop.getStep(), loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index 1a486b4db8597c..11eaf60e43dd14 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -22,6 +22,11 @@ program do_concurrent_basic ! CHECK-NOT: fir.do_loop ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + + ! CHECK-DAG: %[[LB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.lb"} + ! CHECK-DAG: %[[UB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.ub"} + ! CHECK-DAG: %[[STEP_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.step"} + ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index @@ -33,29 +38,35 @@ program do_concurrent_basic ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 - ! CHECK: omp.target ! CHECK-SAME: trip_count(%[[TRIP_COUNT]] : i64) - ! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: map_entries(%[[LB_MAP_INFO]] -> %[[LB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[UB_MAP_INFO]] -> %[[UB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[STEP_MAP_INFO]] -> %[[STEP_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + ! CHECK: %[[LB_DEV_DECL:.*]]:2 = hlfir.declare %[[LB_ARG]] + ! CHECK: %[[LB_DEV_VAL:.*]] = fir.load %[[LB_DEV_DECL]]#1 + + ! CHECK: %[[UB_DEV_DECL:.*]]:2 = hlfir.declare %[[UB_ARG]] + ! CHECK: %[[UB_DEV_VAL:.*]] = fir.load %[[UB_DEV_DECL]]#1 + + ! CHECK: %[[STEP_DEV_DECL:.*]]:2 = hlfir.declare %[[STEP_ARG]] + ! CHECK: %[[STEP_DEV_VAL:.*]] = fir.load %[[STEP_DEV_DECL]]#1 + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { ! CHECK-NEXT: omp.parallel { ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 - ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index - ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 - ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index - ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK-NEXT: omp.distribute { ! CHECK-NEXT: omp.wsloop { - ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB_DEV_VAL]]) to (%[[UB_DEV_VAL]]) inclusive step (%[[STEP_DEV_VAL]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref @@ -76,6 +87,7 @@ program do_concurrent_basic ! CHECK-NEXT: } ! CHECK-NEXT: omp.terminator ! CHECK-NEXT: } + do concurrent (i=1:10) a(i) = i end do diff --git a/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 new file mode 100644 index 00000000000000..48d3b367874778 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 @@ -0,0 +1,89 @@ +! Tests loop-nest detection algorithm for do-concurrent mapping. + +! REQUIRES: asserts + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=host \ +! RUN: -mmlir -debug %s -o - 2> %t.log || true + +! RUN: FileCheck %s < %t.log + +program main + implicit none + +contains + +subroutine foo(n) + implicit none + integer :: n, m + integer :: i, j, k + integer :: x + integer, dimension(n) :: a + integer, dimension(n, n, n) :: b + + ! NOTE This for sure is a perfect loop nest. However, the way `do-concurrent` + ! loops are now emitted by flang is probably not correct. This is being looked + ! into at the moment and once we have flang emitting proper loop headers, we + ! will revisit this. + ! + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested + do concurrent(i=1:n, j=1:bar(n*m, n/m)) + a(i) = n + end do + + ! NOTE same as above. + ! + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested + do concurrent(i=bar(n, x):n, j=1:bar(n*m, n/m)) + a(i) = n + end do + + ! NOTE This is **not** a perfect nest since the inner call to `bar` will allocate + ! memory for the temp results of `n*m` and `n/m` **inside** the outer loop. + ! + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested + do concurrent(i=bar(n, x):n) + do concurrent(j=1:bar(n*m, n/m)) + a(i) = n + end do + end do + + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested + do concurrent(i=1:n) + x = 10 + do concurrent(j=1:m) + b(i,j,k) = i * j + k + end do + end do + + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is not perfectly nested + do concurrent(i=1:n) + do concurrent(j=1:m) + b(i,j,k) = i * j + k + end do + x = 10 + end do + + ! CHECK: Loop pair starting at location + ! CHECK: loc("{{.*}}":[[# @LINE + 1]]:{{.*}}) is perfectly nested + do concurrent(i=1:n) + do concurrent(j=1:m) + b(i,j,k) = i * j + k + x = 10 + end do + end do +end subroutine + +pure function bar(n, m) + implicit none + integer, intent(in) :: n, m + integer :: bar + + bar = n + m +end function + +end program main diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index 17cf27a9b70b27..18758cfc5efcdd 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -20,6 +20,9 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/partially_nested.f90 -o - \ ! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/dummy_arg_loop_bounds.f90 -o - \ +! RUN: | FileCheck %s --check-prefixes=DUMMY_UBS + !--- multi_range.f90 program main integer, parameter :: n = 10 @@ -76,32 +79,14 @@ program main ! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} ! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} -! COMMON: %[[C1_1:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_I:.*]] = fir.convert %[[C1_1]] : (i32) -> index -! COMMON: %[[C10:.*]] = arith.constant 10 : i32 -! COMMON: %[[UB_I:.*]] = fir.convert %[[C10]] : (i32) -> index -! COMMON: %[[STEP_I:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_2:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_J:.*]] = fir.convert %[[C1_2]] : (i32) -> index -! COMMON: %[[C20:.*]] = arith.constant 20 : i32 -! COMMON: %[[UB_J:.*]] = fir.convert %[[C20]] : (i32) -> index -! COMMON: %[[STEP_J:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_3:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_K:.*]] = fir.convert %[[C1_3]] : (i32) -> index -! COMMON: %[[C30:.*]] = arith.constant 30 : i32 -! COMMON: %[[UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index -! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index - ! DEVICE: omp.distribute ! COMMON: omp.wsloop { ! COMMON-NEXT: omp.loop_nest ! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) -! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) -! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive -! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { +! COMMON-SAME: : index = (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) +! COMMON-SAME: to (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) inclusive +! COMMON-SAME: step (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) { ! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] ! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#1 @@ -119,3 +104,52 @@ program main ! HOST-NEXT: omp.terminator ! HOST-NEXT: } + +!--- dummy_arg_loop_bounds.f90 + +subroutine foo(n, m) + implicit none + integer :: n, m + integer :: i, j + integer :: a(n, m) + + do concurrent(i=1:n, j=1:m) + a(i,j) = i * j + end do +end subroutine + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.step"} + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.step"} + + +! DUMMY_UBS: omp.target {{.*}} { + +! DUMMY_UBS-DAG: %[[LOOP0_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.lb"} +! DUMMY_UBS-DAG: %[[LOOP0_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.ub"} +! DUMMY_UBS-DAG: %[[LOOP0_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.step"} + +! DUMMY_UBS-DAG: %[[LOOP1_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.lb"} +! DUMMY_UBS-DAG: %[[LOOP1_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.ub"} +! DUMMY_UBS-DAG: %[[LOOP1_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.step"} + +! DUMMY_UBS-DAG: %[[LOOP0_LB:.*]] = fir.load %[[LOOP0_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_UB:.*]] = fir.load %[[LOOP0_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_STEP:.*]] = fir.load %[[LOOP0_STEP_DECL]]#1 + +! DUMMY_UBS-DAG: %[[LOOP1_LB:.*]] = fir.load %[[LOOP1_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_UB:.*]] = fir.load %[[LOOP1_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_STEP:.*]] = fir.load %[[LOOP1_STEP_DECL]]#1 + +! DUMMY_UBS: omp.loop_nest (%{{.*}}, %{{.*}}) : index +! DUMMY_UBS-SAME: = (%[[LOOP0_LB]], %[[LOOP1_LB]]) +! DUMMY_UBS-SAME: to (%[[LOOP0_UB]], %[[LOOP1_UB]]) +! DUMMY_UBS-SAME: inclusive step (%[[LOOP0_STEP]], %[[LOOP1_STEP]]) + +! DUMMY_UBS: omp.terminator +! DUMMY_UBS: } + diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index 0dc2400a8863df..0b5c0dcc5960c6 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -29,12 +29,15 @@ program main ! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"} ! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] -! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]: -! DEVICE-SAME: !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { +! DEVICE: omp.target {{.*}}map_entries( +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^[:space:]]+]] : {{.*}}) ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 index 5420ff4586be60..69ad78822b975b 100644 --- a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -26,10 +26,13 @@ subroutine foo(n) ! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) ! CHECK: omp.target -! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], +! CHECK-SAME: map_entries(%{{[^[:space:]]+}} -> %[[LB_ARG:arg[0-9]*]], +! CHECK-SAME: %{{[^[:space:]]+}} -> %[[UB_ARG:arg[0-9]*]], +! CHECK-SAME: %{{[^[:space:]]+}} -> %[[STEP_ARG:arg[0-9]*]], +! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], ! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], ! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) -! CHECK-SAME: {{.*}} { +! CHECK-SAME: { ! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] ! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 index 93dbd5a80040e5..0eff136d75b97e 100644 --- a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -29,11 +29,14 @@ program main ! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}} ! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]] -! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]], -! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^:]+]]: -! DEVICE-SAME: !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { +! DEVICE: omp.target {{.*}}map_entries( +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^[:space:]]+]], +! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^[:space:]]+]] : {{.*}}) { ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"}