diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index b6a63f802647f5..66e2508bcca500 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1320,9 +1320,11 @@ static void genBodyOfTargetOp( while (!valuesDefinedAbove.empty()) { for (mlir::Value val : valuesDefinedAbove) { mlir::Operation *valOp = val.getDefiningOp(); + assert(valOp != nullptr); if (mlir::isMemoryEffectFree(valOp)) { mlir::Operation *clonedOp = valOp->clone(); regionBlock->push_front(clonedOp); + assert(clonedOp->getNumResults() == 1); val.replaceUsesWithIf( clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == regionBlock; diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp index 7fe27c947e549d..b244173c8aaeee 100644 --- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp @@ -147,6 +147,68 @@ mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, return tripCount; } + +/// Check if cloning the bounds introduced any dependency on the outer region. +/// If so, then either clone them as well if they are MemoryEffectFree, or else +/// copy them to a new temporary and add them to the map and block_argument +/// lists and replace their uses with the new temporary. +/// +/// TODO: similar to the above functions, this is copied from OpenMP lowering +/// (in this case, from `genBodyOfTargetOp`). Once we move to a common lib for +/// these utils this will move as well. +void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder, + mlir::omp::TargetOp targetOp) { + mlir::Region &targetRegion = targetOp.getRegion(); + mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front(); + llvm::SetVector valuesDefinedAbove; + mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove); + + while (!valuesDefinedAbove.empty()) { + for (mlir::Value val : valuesDefinedAbove) { + mlir::Operation *valOp = val.getDefiningOp(); + assert(valOp != nullptr); + if (mlir::isMemoryEffectFree(valOp)) { + mlir::Operation *clonedOp = valOp->clone(); + targetEntryBlock->push_front(clonedOp); + assert(clonedOp->getNumResults() == 1); + val.replaceUsesWithIf( + clonedOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == targetEntryBlock; + }); + } else { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(valOp); + auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); + + llvm::SmallVector bounds; + std::stringstream name; + builder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + builder, copyVal.getLoc(), copyVal, + /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, + /*members=*/llvm::SmallVector{}, + /*membersIndex=*/mlir::DenseIntElementsAttr{}, + static_cast< + std::underlying_type_t>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapVarsMutable().append(mapOp); + mlir::Value clonedValArg = + targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); + builder.setInsertionPointToStart(targetEntryBlock); + auto loadOp = + builder.create(clonedValArg.getLoc(), clonedValArg); + val.replaceUsesWithIf( + loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == targetEntryBlock; + }); + } + } + valuesDefinedAbove.clear(); + mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove); + } +} } // namespace internal } // namespace omp } // namespace lower @@ -720,14 +782,19 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { llvm::SmallVector boundsOps; genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps); + // Use the raw address to avoid unboxing `fir.box` values whenever possible. + // Put differently, if we have access to the direct value memory + // reference/address, we use it. + mlir::Value rawAddr = declareOp.getOriginalBase(); return Fortran::lower::omp ::internal::createMapInfoOp( - rewriter, liveIn.getLoc(), declareOp.getBase(), /*varPtrPtr=*/{}, - declareOp.getUniqName().str(), boundsOps, /*members=*/{}, + rewriter, liveIn.getLoc(), rawAddr, + /*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps, + /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{}, static_cast< std::underlying_type_t>( mapFlag), - captureKind, liveInType); + captureKind, rawAddr.getType()); } mlir::omp::TargetOp genTargetOp(mlir::Location loc, @@ -754,14 +821,24 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); mlir::Value miOperand = miOp.getVariableOperand(0); - mapper.map(miOperand, liveInDeclare.getBase()); + + // TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // need to "unpack" the box by getting the defining op of it's value. + // However, we did not hit this case in reality yet so leaving it as a + // todo for now. + + mapper.map(miOperand, liveInDeclare.getOriginalBase()); if (auto origDeclareOp = mlir::dyn_cast_if_present( miOperand.getDefiningOp())) - mapper.map(origDeclareOp.getOriginalBase(), - liveInDeclare.getOriginalBase()); + mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); } + fir::FirOpBuilder firBuilder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType())); + Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, + targetOp); rewriter.setInsertionPoint( rewriter.create(targetOp.getLoc())); diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index 7873fa4f88db6d..2f762c003ddf16 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -21,7 +21,7 @@ program do_concurrent_basic ! CHECK-NOT: fir.do_loop - ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#0 + ! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 ! CHECK: %[[C0:.*]] = arith.constant 0 : index ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index @@ -29,7 +29,7 @@ program do_concurrent_basic ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) ! CHECK-SAME: extent(%[[A_EXTENT]] : index) - ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#0 : {{[^(]+}}) + ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) ! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64 diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 new file mode 100644 index 00000000000000..5420ff4586be60 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -0,0 +1,42 @@ +! Tests `do concurrent` mapping when mapped value(s) depend on values defined +! outside the target region; e.g. the size of the array is dynamic. This needs +! to be handled by localizing these region outsiders by either cloning them in +! the region or in case we cannot do that, map them and use the mapped values. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \ +! RUN: | FileCheck %s + +subroutine foo(n) + implicit none + integer :: n + integer :: i + integer, dimension(n) :: a + + do concurrent(i=1:10) + a(i) = i + end do +end subroutine + +! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"} +! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"} +! CHECK-DAG: %[[N_ALLOC:.*]] = fir.alloca i32 + +! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) +! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) +! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) + +! CHECK: omp.target +! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]], +! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]], +! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) +! CHECK-SAME: {{.*}} { + +! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] +! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] +! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]] + +! CHECK: omp.terminator +! CHECK: } + + +