diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index 75a0c4d52f1d007..9e86ab2f9de068a 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -24,11 +24,13 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include #include +#include #include namespace flangomp { @@ -249,29 +251,59 @@ bool isIndVarUltimateOperand(mlir::Operation *op, fir::DoLoopOp doLoop) { return false; }; +mlir::Value findLoopIndVar(fir::DoLoopOp doLoop) { + mlir::Value result = nullptr; + mlir::visitUsedValuesDefinedAbove( + doLoop.getRegion(), [&](mlir::OpOperand *operand) { + if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) + result = operand->get(); + }); + + assert(result != nullptr); + return result; +} + /// Collect the list of values used inside the loop but defined outside of it. /// The first item in the returned list is always the loop's induction /// variable. -void collectLoopLiveIns(fir::DoLoopOp doLoop, - llvm::SmallVectorImpl &liveIns) { +void collectLoopNestLiveIns( + LoopNestToIndVarMap &loopNest, llvm::SmallVectorImpl &liveIns, + llvm::DenseMap *liveInToName = nullptr) { llvm::SmallDenseSet seenValues; llvm::SmallDenseSet seenOps; - mlir::visitUsedValuesDefinedAbove( - doLoop.getRegion(), [&](mlir::OpOperand *operand) { - if (!seenValues.insert(operand->get()).second) - return; + auto addValueToLiveIns = [&](mlir::Value liveIn) { + if (!seenValues.insert(liveIn).second) + return false; - mlir::Operation *definingOp = operand->get().getDefiningOp(); - // We want to collect ops corresponding to live-ins only once. - if (definingOp && !seenOps.insert(definingOp).second) - return; + mlir::Operation *definingOp = liveIn.getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return false; - liveIns.push_back(operand->get()); + liveIns.push_back(liveIn); + return true; + }; - if (isIndVarUltimateOperand(operand->getOwner(), doLoop)) - std::swap(*liveIns.begin(), *liveIns.rbegin()); - }); + size_t nestLevel = 0; + for (auto [loop, _] : loopNest) { + auto addBoundOrStepToLiveIns = [&](mlir::Value operand, std::string name) { + (*liveInToName)[operand] = name; + addValueToLiveIns(operand); + }; + + addBoundOrStepToLiveIns(loop.getLowerBound(), + "loop." + std::to_string(nestLevel) + ".lb"); + addBoundOrStepToLiveIns(loop.getUpperBound(), + "loop." + std::to_string(nestLevel) + ".ub"); + addBoundOrStepToLiveIns(loop.getStep(), + "loop." + std::to_string(nestLevel) + ".step"); + ++nestLevel; + } + + mlir::visitUsedValuesDefinedAbove( + loopNest.front().first.getRegion(), + [&](mlir::OpOperand *operand) { addValueToLiveIns(operand->get()); }); } /// Collects the op(s) responsible for updating a loop's iteration variable with @@ -439,20 +471,17 @@ bool isPerfectlyNested(fir::DoLoopOp outerLoop, fir::DoLoopOp innerLoop) { /// function collects as much as possible loops in the nest; it case it fails to /// recognize a certain nested loop as part of the nest it just returns the /// parent loops it discovered before. -mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, +mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop, LoopNestToIndVarMap &loopNest) { - assert(outerLoop.getUnordered()); - llvm::SmallVector outerLoopLiveIns; - collectLoopLiveIns(outerLoop, outerLoopLiveIns); - + assert(currentLoop.getUnordered()); while (true) { loopNest.try_emplace( - outerLoop, + currentLoop, InductionVariableInfo{ - outerLoopLiveIns.front().getDefiningOp(), - std::move(looputils::extractIndVarUpdateOps(outerLoop))}); + findLoopIndVar(currentLoop).getDefiningOp(), + std::move(looputils::extractIndVarUpdateOps(currentLoop))}); - auto directlyNestedLoops = outerLoop.getRegion().getOps(); + auto directlyNestedLoops = currentLoop.getRegion().getOps(); llvm::SmallVector unorderedLoops; for (auto nestedLoop : directlyNestedLoops) @@ -472,14 +501,10 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp outerLoop, (nestedUnorderedLoop.getStep().getDefiningOp() == nullptr)) return mlir::failure(); - llvm::SmallVector nestedLiveIns; - collectLoopLiveIns(nestedUnorderedLoop, nestedLiveIns); - - if (!isPerfectlyNested(outerLoop, nestedUnorderedLoop)) + if (!isPerfectlyNested(currentLoop, nestedUnorderedLoop)) return mlir::failure(); - outerLoop = nestedUnorderedLoop; - outerLoopLiveIns = std::move(nestedLiveIns); + currentLoop = nestedUnorderedLoop; } return mlir::success(); @@ -653,10 +678,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "defining operation."); } - llvm::SmallVector outermostLoopLiveIns; - looputils::collectLoopLiveIns(doLoop, outermostLoopLiveIns); - assert(!outermostLoopLiveIns.empty()); - looputils::LoopNestToIndVarMap loopNest; bool hasRemainingNestedLoops = failed(looputils::collectLoopNest(doLoop, loopNest)); @@ -665,15 +686,57 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "Some `do concurent` loops are not perfectly-nested. " "These will be serialzied."); + llvm::DenseMap liveInToName; + llvm::SmallVector loopNestLiveIns; + + // TODO outline into a separete function. This hoists the ops to compute + // bounds of all loops in the entire loop nest outside the outermost loop. + // Without this hoisting, values/variables that are required to compute + // these bounds will be stuck inside the original `fir.do_loop` ops and + // therefore their SSA values won't be visible from within the `target` + // region. + { + fir::DoLoopOp outermostLoop = loopNest.front().first; + + mlir::BackwardSliceOptions backwardSliceOptions; + backwardSliceOptions.inclusive = true; + // We will collect the backward slices for innerLoop's LB, UB, and step. + // However, we want to limit the scope of these slices to the scope of + // outerLoop's region. + backwardSliceOptions.filter = [&](mlir::Operation *op) { + return !mlir::areValuesDefinedAbove(op->getResults(), + outermostLoop.getRegion()); + }; + + for (auto [loop, _] : loopNest) { + auto moveBoundOrStepOutOfLoopNest = [&](mlir::Value operand) { + llvm::SetVector loopOperandSlice; + mlir::getBackwardSlice(operand, &loopOperandSlice, + backwardSliceOptions); + + for (mlir::Operation *sliceOp : loopOperandSlice) { + outermostLoop.moveOutOfLoop(sliceOp); + } + }; + + moveBoundOrStepOutOfLoopNest(loop.getLowerBound()); + moveBoundOrStepOutOfLoopNest(loop.getUpperBound()); + moveBoundOrStepOutOfLoopNest(loop.getStep()); + } + } + + looputils::collectLoopNestLiveIns(loopNest, loopNestLiveIns, &liveInToName); + assert(!loopNestLiveIns.empty()); + llvm::SetVector locals; looputils::collectLoopLocalValues(loopNest.back().first, locals); // We do not want to map "loop-local" values to the device through // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. - outermostLoopLiveIns.erase(llvm::remove_if(outermostLoopLiveIns, - [&](mlir::Value liveIn) { - return locals.contains(liveIn); - }), - outermostLoopLiveIns.end()); + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); looputils::sinkLoopIVArgs(rewriter, loopNest); @@ -688,12 +751,13 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // The outermost loop will contain all the live-in values in all nested // loops since live-in values are collected recursively for all nested // ops. - for (mlir::Value liveIn : outermostLoopLiveIns) + for (mlir::Value liveIn : loopNestLiveIns) { targetClauseOps.mapVars.push_back( - genMapInfoOpForLiveIn(rewriter, liveIn)); + genMapInfoOpForLiveIn(rewriter, liveIn, liveInToName)); + } - targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, - outermostLoopLiveIns, targetClauseOps); + targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps); genTeamsOp(doLoop.getLoc(), rewriter); } @@ -746,14 +810,14 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { private: void genBoundsOps(mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, hlfir::DeclareOp declareOp, + mlir::Location loc, mlir::Value shape, llvm::SmallVectorImpl &boundsOps) const { - if (declareOp.getShape() == nullptr) { + if (shape == nullptr) { return; } - auto shapeOp = mlir::dyn_cast_if_present( - declareOp.getShape().getDefiningOp()); + auto shapeOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); if (shapeOp == nullptr) TODO(loc, "Shapes not defined by shape op's are not supported yet."); @@ -778,15 +842,36 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { boundsOps.push_back(genBoundsOp(extent)); } - mlir::omp::MapInfoOp - genMapInfoOpForLiveIn(mlir::ConversionPatternRewriter &rewriter, - mlir::Value liveIn) const { - auto declareOp = - mlir::dyn_cast_if_present(liveIn.getDefiningOp()); + mlir::omp::MapInfoOp genMapInfoOpForLiveIn( + mlir::ConversionPatternRewriter &rewriter, mlir::Value liveIn, + const llvm::DenseMap &liveInToName) const { + mlir::Value rawAddr = liveIn; + mlir::Value shape = nullptr; + std::string name = ""; - if (declareOp == nullptr) - TODO(liveIn.getLoc(), - "Values not defined by declare op's are not supported yet."); + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + shape = declareOp.getShape(); + name = declareOp.getUniqName().str(); + } else if (liveInToName.contains(liveIn)) + name = liveInToName.at(liveIn); + + if (!llvm::isa(rawAddr.getType())) { + fir::FirOpBuilder builder( + rewriter, fir::getKindMapping( + liveInDefiningOp->getParentOfType())); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } mlir::Type liveInType = liveIn.getType(); mlir::Type eleType = liveInType; @@ -806,15 +891,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } llvm::SmallVector boundsOps; - genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps); + genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps); - // Use the raw address to avoid unboxing `fir.box` values whenever possible. - // Put differently, if we have access to the direct value memory - // reference/address, we use it. - mlir::Value rawAddr = declareOp.getOriginalBase(); return Fortran::lower::omp ::internal::createMapInfoOp( rewriter, liveIn.getLoc(), rawAddr, - /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps, + /*varPtrPtr=*/{}, name, boundsOps, /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{}, static_cast< @@ -835,34 +916,40 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { llvm::SmallVector liveInTypes; llvm::SmallVector liveInLocs; - for (mlir::Value liveIn : liveIns) { - liveInTypes.push_back(liveIn.getType()); - liveInLocs.push_back(liveIn.getLoc()); + for (mlir::Value mapInfoOp : clauseOps.mapVars) { + auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); + liveInTypes.push_back(miOp.getVarPtr().getType()); + liveInLocs.push_back(miOp.getVarPtr().getLoc()); } rewriter.createBlock(®ion, {}, liveInTypes, liveInLocs); + fir::FirOpBuilder firBuilder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType())); - for (auto [arg, mapInfoOp] : - llvm::zip_equal(region.getArguments(), clauseOps.mapVars)) { + for (auto [liveIn, arg, mapInfoOp] : + llvm::zip_equal(liveIns, region.getArguments(), clauseOps.mapVars)) { auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); - mlir::Value miOperand = miOp.getVariableOperand(0); - // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // TODO If `liveIn.getDefiningOp()` is a `fir::BoxAddrOp`, we probably // need to "unpack" the box by getting the defining op of it's value. // However, we did not hit this case in reality yet so leaving it as a // todo for now. - mapper.map(miOperand, liveInDeclare.getOriginalBase()); + if (!llvm::isa(liveIn.getType())) + mapper.map(liveIn, + firBuilder.loadIfRef(liveIn.getLoc(), + liveInDeclare.getOriginalBase())); + else + mapper.map(liveIn, liveInDeclare.getOriginalBase()); if (auto origDeclareOp = mlir::dyn_cast_if_present( - miOperand.getDefiningOp())) + liveIn.getDefiningOp())) { mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); + } } - fir::FirOpBuilder firBuilder( - rewriter, - fir::getKindMapping(targetOp->getParentOfType())); Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, targetOp); rewriter.setInsertionPoint( @@ -943,24 +1030,31 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { looputils::collectIndirectConstOpChain(operation, opChain); mlir::Operation *result; - for (mlir::Operation *link : opChain) + for (mlir::Operation *link : opChain) { result = rewriter.clone(*link, mapper); + } return result; }; for (auto &[doLoop, _] : loopNest) { - mlir::Operation *lbOp = doLoop.getLowerBound().getDefiningOp(); - loopNestClauseOps.loopLowerBounds.push_back( - cloneBoundOrStepOpChain(lbOp)->getResult(0)); - - mlir::Operation *ubOp = doLoop.getUpperBound().getDefiningOp(); - loopNestClauseOps.loopUpperBounds.push_back( - cloneBoundOrStepOpChain(ubOp)->getResult(0)); - - mlir::Operation *stepOp = doLoop.getStep().getDefiningOp(); - loopNestClauseOps.loopSteps.push_back( - cloneBoundOrStepOpChain(stepOp)->getResult(0)); + auto addBoundsOrStep = + [&](mlir::Value value, + llvm::SmallVectorImpl &boundsOrStepVec) { + if (mapper.contains(value)) + boundsOrStepVec.push_back(mapper.lookup(value)); + else { + mlir::Operation *definingOp = value.getDefiningOp(); + boundsOrStepVec.push_back( + cloneBoundOrStepOpChain(definingOp)->getResult(0)); + } + }; + + addBoundsOrStep(doLoop.getLowerBound(), + loopNestClauseOps.loopLowerBounds); + addBoundsOrStep(doLoop.getUpperBound(), + loopNestClauseOps.loopUpperBounds); + addBoundsOrStep(doLoop.getStep(), loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index 2f762c003ddf166..a8b70e67839975a 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -22,6 +22,11 @@ program do_concurrent_basic ! CHECK-NOT: fir.do_loop ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + + ! CHECK-DAG: %[[LB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.lb"} + ! CHECK-DAG: %[[UB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.ub"} + ! CHECK-DAG: %[[STEP_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.step"} + ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index @@ -33,31 +38,40 @@ program do_concurrent_basic ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 - ! CHECK: omp.target - ! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: map_entries(%[[LB_MAP_INFO]] -> %[[LB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[UB_MAP_INFO]] -> %[[UB_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[STEP_MAP_INFO]] -> %[[STEP_ARG:.[[:alnum:]]+]], + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] ! CHECK-SAME: trip_count(%[[TRIP_COUNT]] : i64) - ! CHECK-NEXT: ^{{.*}}(%[[I_ARG]]: !fir.ref, %[[A_ARG]]: !fir.ref>): + ! CHECK-NEXT: ^{{.*}}(%[[LB_ARG]]: !fir.ref, + ! CHECK-SAME: %[[UB_ARG]]: !fir.ref, %[[STEP_ARG]]: !fir.ref, + ! CHECK-SAME: %[[I_ARG]]: !fir.ref, + ! CHECK-SAME: %[[A_ARG]]: !fir.ref>, %[[A_EXT_ARG]]: !fir.ref): + + ! CHECK: %[[LB_DEV_DECL:.*]]:2 = hlfir.declare %[[LB_ARG]] + ! CHECK: %[[LB_DEV_VAL:.*]] = fir.load %[[LB_DEV_DECL]]#1 + + ! CHECK: %[[UB_DEV_DECL:.*]]:2 = hlfir.declare %[[UB_ARG]] + ! CHECK: %[[UB_DEV_VAL:.*]] = fir.load %[[UB_DEV_DECL]]#1 + + ! CHECK: %[[STEP_DEV_DECL:.*]]:2 = hlfir.declare %[[STEP_ARG]] + ! CHECK: %[[STEP_DEV_VAL:.*]] = fir.load %[[STEP_DEV_DECL]]#1 ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { ! CHECK-NEXT: omp.parallel { ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 - ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index - ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 - ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index - ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK-NEXT: omp.distribute { ! CHECK-NEXT: omp.wsloop { - ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB_DEV_VAL]]) to (%[[UB_DEV_VAL]]) inclusive step (%[[STEP_DEV_VAL]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref @@ -78,6 +92,7 @@ program do_concurrent_basic ! CHECK-NEXT: } ! CHECK-NEXT: omp.terminator ! CHECK-NEXT: } + do concurrent (i=1:10) a(i) = i end do diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index 17cf27a9b70b27e..18758cfc5efcddb 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -20,6 +20,9 @@ ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/partially_nested.f90 -o - \ ! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/dummy_arg_loop_bounds.f90 -o - \ +! RUN: | FileCheck %s --check-prefixes=DUMMY_UBS + !--- multi_range.f90 program main integer, parameter :: n = 10 @@ -76,32 +79,14 @@ program main ! COMMON-NEXT: %[[ITER_VAR_K:.*]] = fir.alloca i32 {bindc_name = "k"} ! COMMON-NEXT: %[[BINDING_K:.*]]:2 = hlfir.declare %[[ITER_VAR_K]] {uniq_name = "_QFEk"} -! COMMON: %[[C1_1:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_I:.*]] = fir.convert %[[C1_1]] : (i32) -> index -! COMMON: %[[C10:.*]] = arith.constant 10 : i32 -! COMMON: %[[UB_I:.*]] = fir.convert %[[C10]] : (i32) -> index -! COMMON: %[[STEP_I:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_2:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_J:.*]] = fir.convert %[[C1_2]] : (i32) -> index -! COMMON: %[[C20:.*]] = arith.constant 20 : i32 -! COMMON: %[[UB_J:.*]] = fir.convert %[[C20]] : (i32) -> index -! COMMON: %[[STEP_J:.*]] = arith.constant 1 : index - -! COMMON: %[[C1_3:.*]] = arith.constant 1 : i32 -! COMMON: %[[LB_K:.*]] = fir.convert %[[C1_3]] : (i32) -> index -! COMMON: %[[C30:.*]] = arith.constant 30 : i32 -! COMMON: %[[UB_K:.*]] = fir.convert %[[C30]] : (i32) -> index -! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index - ! DEVICE: omp.distribute ! COMMON: omp.wsloop { ! COMMON-NEXT: omp.loop_nest ! COMMON-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) -! COMMON-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) -! COMMON-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive -! COMMON-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { +! COMMON-SAME: : index = (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) +! COMMON-SAME: to (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) inclusive +! COMMON-SAME: step (%{{[^[:space:]]+}}, %{{[^[:space:]]+}}, %{{[^[:space:]]+}}) { ! COMMON-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] ! COMMON-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#1 @@ -119,3 +104,52 @@ program main ! HOST-NEXT: omp.terminator ! HOST-NEXT: } + +!--- dummy_arg_loop_bounds.f90 + +subroutine foo(n, m) + implicit none + integer :: n, m + integer :: i, j + integer :: a(n, m) + + do concurrent(i=1:n, j=1:m) + a(i,j) = i * j + end do +end subroutine + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.0.step"} + +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.lb"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.ub"} +! DUMMY_UBS-DAG: omp.map.info {{.*}} {name = "loop.1.step"} + + +! DUMMY_UBS: omp.target {{.*}} { + +! DUMMY_UBS-DAG: %[[LOOP0_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.lb"} +! DUMMY_UBS-DAG: %[[LOOP0_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.ub"} +! DUMMY_UBS-DAG: %[[LOOP0_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.0.step"} + +! DUMMY_UBS-DAG: %[[LOOP1_LB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.lb"} +! DUMMY_UBS-DAG: %[[LOOP1_UB_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.ub"} +! DUMMY_UBS-DAG: %[[LOOP1_STEP_DECL:.*]]:2 = hlfir.declare %arg{{.*}} {uniq_name = "loop.1.step"} + +! DUMMY_UBS-DAG: %[[LOOP0_LB:.*]] = fir.load %[[LOOP0_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_UB:.*]] = fir.load %[[LOOP0_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP0_STEP:.*]] = fir.load %[[LOOP0_STEP_DECL]]#1 + +! DUMMY_UBS-DAG: %[[LOOP1_LB:.*]] = fir.load %[[LOOP1_LB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_UB:.*]] = fir.load %[[LOOP1_UB_DECL]]#1 +! DUMMY_UBS-DAG: %[[LOOP1_STEP:.*]] = fir.load %[[LOOP1_STEP_DECL]]#1 + +! DUMMY_UBS: omp.loop_nest (%{{.*}}, %{{.*}}) : index +! DUMMY_UBS-SAME: = (%[[LOOP0_LB]], %[[LOOP1_LB]]) +! DUMMY_UBS-SAME: to (%[[LOOP0_UB]], %[[LOOP1_UB]]) +! DUMMY_UBS-SAME: inclusive step (%[[LOOP0_STEP]], %[[LOOP1_STEP]]) + +! DUMMY_UBS: omp.terminator +! DUMMY_UBS: } + diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index f3f2e78f5b31836..9b0c9fa5b25179e 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -31,9 +31,14 @@ program main ! DEVICE: omp.target -! DEVICE: ^bb0(%[[I_ARG:[^[:space:]]+]]: !fir.ref, %[[X_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE: ^bb0( +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %[[I_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE-SAME: %[[X_ARG:[^[:space:]]+]]: !fir.ref, ! DEVICE-SAME: %[[J_ARG:[^[:space:]]+]]: !fir.ref, %[[K_ARG:[^[:space:]]+]]: !fir.ref, -! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref>): +! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref> ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"} diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 index 5420ff4586be605..5be66261b484433 100644 --- a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -26,7 +26,10 @@ subroutine foo(n) ! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) ! CHECK: omp.target -! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], +! CHECK-SAME: map_entries(%[[LOOP_LB_MAP]] -> %[[LB_ARG:arg[0-9]*]], +! CHECK-SAME: %[[LOOP_UB_MAP]] -> %[[UB_ARG:arg[0-9]*]], +! CHECK-SAME: %[[LOOP_STEP_MAP]] -> %[[STEP_ARG:arg[0-9]*]], +! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], ! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], ! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) ! CHECK-SAME: {{.*}} { diff --git a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 index 429500cead1073b..49174fd6d406106 100644 --- a/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 +++ b/flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90 @@ -31,9 +31,13 @@ program main ! DEVICE: omp.target -! DEVICE: ^bb0(%[[I_ARG:[^[:space:]]+]]: !fir.ref, +! DEVICE: ^bb0( +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %{{[^[:space:]]+}}: {{[^[:space:]]+}}, +! DEVICE-SAME: %[[I_ARG:[^[:space:]]+]]: !fir.ref, ! DEVICE-SAME: %[[J_ARG:[^[:space:]]+]]: !fir.ref, %[[K_ARG:[^[:space:]]+]]: !fir.ref, -! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref>): +! DEVICE-SAME: %[[A_ARG:[^[:space:]]+]]: !fir.ref> ! DEVICE: %[[TARGET_J_DECL:.*]]:2 = hlfir.declare %[[J_ARG]] {uniq_name = "_QFEj"} ! DEVICE: %[[TARGET_K_DECL:.*]]:2 = hlfir.declare %[[K_ARG]] {uniq_name = "_QFEk"}