diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 27631d427eb273..a2661ffb437832 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -969,7 +969,7 @@ bool ClauseProcessor::processSectionsReduction( } bool ClauseProcessor::processTargetReduction( - llvm::SmallVector &reductionSymbols) + llvm::SmallVectorImpl &reductionSymbols) const { return findRepeatableClause( [&](const omp::clause::Reduction &clause, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index b12a3dd891e772..050f2fef1332c0 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -127,8 +127,8 @@ class ClauseProcessor { bool processSectionsReduction(mlir::Location currentLocation, mlir::omp::ReductionClauseOps &result) const; bool processTargetReduction( - llvm::SmallVector &reductionSymbols) - const; + llvm::SmallVectorImpl + &reductionSymbols) const; bool processTo(llvm::SmallVectorImpl &result) const; bool processUseDeviceAddr(mlir::omp::UseDeviceClauseOps &result, diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 975fff02ab4fb7..befadea2a98aba 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -50,11 +50,8 @@ void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) { firOpBuilder.setInsertionPointAfter(op); insertDeallocs(); } else { - // insert dummy instruction to mark the insertion position - mlir::Value undefMarker = firOpBuilder.create( - op->getLoc(), firOpBuilder.getIndexType()); + mlir::OpBuilder::InsertionGuard guard(firOpBuilder); insertDeallocs(); - firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); } } diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7a8d8973b46e51..a80b992c0c7465 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -596,24 +596,6 @@ genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, firOpBuilder.setInsertionPointAfter(storeOp); } -static void genReductionVars( - mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - llvm::ArrayRef reductionArgs, - llvm::ArrayRef reductionTypes) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); - - mlir::Block *entryBlock = firOpBuilder.createBlock( - &op->getRegion(0), {}, reductionTypes, blockArgLocs); - - // Bind the reduction arguments to their block arguments. - for (auto [arg, prv] : - llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { - converter.bindSymbol(*arg, prv); - } -} - static void markDeclareTarget(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, @@ -754,9 +736,9 @@ struct OpWithBodyGenInfo { return *this; } - OpWithBodyGenInfo &setReductions( - llvm::SmallVectorImpl *value1, - llvm::SmallVectorImpl *value2) { + OpWithBodyGenInfo & + setReductions(llvm::ArrayRef *value1, + llvm::ArrayRef *value2) { reductionSymbols = value1; reductionTypes = value2; return *this; @@ -786,10 +768,10 @@ struct OpWithBodyGenInfo { /// [in] if provided, processes the construct's data-sharing attributes. DataSharingProcessor *dsp = nullptr; /// [in] if provided, list of reduction symbols - llvm::SmallVectorImpl *reductionSymbols = + llvm::ArrayRef *reductionSymbols = nullptr; /// [in] if provided, list of reduction types - llvm::SmallVectorImpl *reductionTypes = nullptr; + llvm::ArrayRef *reductionTypes = nullptr; /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block /// is created in the region. GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; @@ -802,12 +784,6 @@ struct OpWithBodyGenInfo { static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); - auto insertMarker = [](fir::FirOpBuilder &builder) { - mlir::Value undef = builder.create(builder.getUnknownLoc(), - builder.getIndexType()); - return undef.getDefiningOp(); - }; - // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further @@ -821,8 +797,9 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { firOpBuilder.createBlock(&op.getRegion(0)); return {}; }(); + // Mark the earliest insertion point. - mlir::Operation *marker = insertMarker(firOpBuilder); + auto marker = firOpBuilder.saveInsertionPoint(); // If it is an unstructured region and is not the outer region of a combined // construct, create empty blocks for all evaluations. @@ -837,7 +814,7 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { llvm::omp::Association::Loop; bool privatize = info.clauses && !info.outerCombined; - firOpBuilder.setInsertionPoint(marker); + firOpBuilder.restoreInsertionPoint(marker); std::optional tempDsp; if (privatize) { if (!info.dsp) { @@ -850,7 +827,7 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { if (info.dir == llvm::omp::Directive::OMPD_parallel) { threadPrivatizeVars(info.converter, info.eval); if (info.clauses) { - firOpBuilder.setInsertionPoint(marker); + firOpBuilder.restoreInsertionPoint(marker); ClauseProcessor(info.converter, info.semaCtx, *info.clauses) .processCopyin(); } @@ -864,7 +841,7 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); - firOpBuilder.setInsertionPointAfter(marker); + firOpBuilder.restoreInsertionPoint(marker); genNestedEvaluations(info.converter, info.eval); temp->erase(); } @@ -939,8 +916,8 @@ static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { } } - firOpBuilder.setInsertionPointAfter(marker); - marker->erase(); + firOpBuilder.setInsertionPoint(marker.getBlock(), + std::prev(marker.getPoint())); } static void genBodyOfTargetDataOp( @@ -1522,20 +1499,23 @@ genCriticalOp(Fortran::lower::AbstractConverter &converter, static mlir::omp::DistributeOp genDistributeOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location loc, const List &clauses, - DataSharingProcessor *dsp = nullptr) { - mlir::omp::DistributeClauseOps clauseOps; - // TODO Process clauses + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const List &clauses, + mlir::omp::DistributeClauseOps &clauseOps, + DataSharingProcessor &dsp) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, loc, eval, - llvm::omp::Directive::OMPD_distribute) - .setGenNested(genNested) - .setOuterCombined(false) - .setClauses(&clauses) - .setDataSharingProcessor(dsp), - clauseOps); + // Create omp.distribute wrapper. + auto distributeOp = + firOpBuilder.create(loc, clauseOps); + + // TODO: Populate entry block arguments with reduction variables. + firOpBuilder.createBlock(&distributeOp.getRegion()); + + firOpBuilder.setInsertionPoint( + Fortran::lower::genOpenMPTerminator(firOpBuilder, distributeOp, loc)); + + return distributeOp; } static mlir::omp::FlushOp @@ -1550,6 +1530,54 @@ genFlushOp(Fortran::lower::AbstractConverter &converter, converter.getCurrentLocation(), operandRange); } +static mlir::omp::LoopNestOp +genLoopNestOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const List &clauses, + mlir::omp::LoopNestClauseOps &clauseOps, + llvm::ArrayRef iv, + llvm::ArrayRef wrapperSyms, + llvm::ArrayRef wrapperArgs, + llvm::omp::Directive directive, DataSharingProcessor &dsp) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + auto ivCallback = [&](mlir::Operation *op) { + genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs); + return llvm::SmallVector(iv); + }; + + auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses)); + auto loopNestOp = genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval, directive) + .setClauses(&clauses) + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback) + .setGenNested(true), + clauseOps); + + // Create trip_count if inside of omp.target and this is host compilation + auto offloadMod = llvm::dyn_cast( + firOpBuilder.getModule().getOperation()); + auto targetOp = loopNestOp->getParentOfType(); + + if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && + targetOp.isTargetSPMDLoop()) { + // Lower loop bounds and step, and process collapsing again, putting lowered + // values outside of omp.target this time. This enables calculating and + // accessing the trip count in the host, which is needed when lowering to + // LLVM IR via the OMPIRBuilder. + HostClausesInsertionGuard guard(firOpBuilder); + mlir::omp::CollapseClauseOps collapseClauseOps; + llvm::SmallVector iv; + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processCollapse(loc, eval, collapseClauseOps, iv); + targetOp.getTripCountMutable().assign( + calculateTripCount(converter, loc, collapseClauseOps)); + } + return loopNestOp; +} + static mlir::omp::MasterOp genMasterOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, @@ -1587,34 +1615,31 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, static mlir::omp::ParallelOp genParallelOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location loc, const List &clauses, - bool outerCombined = false) { + Fortran::lower::pft::Evaluation &eval, bool loopWrapper, + bool genNested, mlir::Location loc, const List &clauses, + mlir::omp::ParallelClauseOps &clauseOps, + mlir::omp::NumThreadsClauseOps &numThreadsClauseOps, + llvm::ArrayRef reductionSyms, + llvm::ArrayRef reductionTypes, + llvm::ArrayRef privateSyms, + mlir::omp::TargetOp parentTarget = nullptr, + bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - Fortran::lower::StatementContext stmtCtx; - auto offloadModOp = - llvm::cast(*converter.getModuleOp()); - mlir::omp::TargetOp targetOp = - findParentTargetOp(converter.getFirOpBuilder()); - bool mustEvalOutsideTarget = - targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); + auto reductionCallback = [&](mlir::Operation *op) { + llvm::SmallVector blockArgLocs(reductionSyms.size(), loc); - mlir::omp::ParallelClauseOps clauseOps; - mlir::omp::NumThreadsClauseOps numThreadsClauseOps; - llvm::SmallVector privateSyms; - llvm::SmallVector reductionTypes; - llvm::SmallVector reductionSyms; - genParallelClauses(converter, semaCtx, stmtCtx, clauses, loc, - /*processReduction=*/!outerCombined, mustEvalOutsideTarget, - clauseOps, numThreadsClauseOps, reductionTypes, - reductionSyms); + mlir::Block *entryBlock = firOpBuilder.createBlock( + &op->getRegion(0), {}, reductionTypes, blockArgLocs); - auto reductionCallback = [&](mlir::Operation *op) { - genReductionVars(op, converter, loc, reductionSyms, reductionTypes); - return reductionSyms; + // Bind the reduction arguments to their block arguments. + if (!loopWrapper) { + for (auto [arg, prv] : + llvm::zip_equal(reductionSyms, entryBlock->getArguments())) + converter.bindSymbol(*arg, prv); + } + return llvm::SmallVector(reductionSyms); }; OpWithBodyGenInfo genInfo = @@ -1624,13 +1649,14 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, .setOuterCombined(outerCombined) .setClauses(&clauses) .setReductions(&reductionSyms, &reductionTypes) - .setGenRegionEntryCb(reductionCallback); + .setGenRegionEntryCb(reductionCallback) + .setDataSharingProcessor(dsp); if (!enableDelayedPrivatization) { auto parallelOp = genOpWithBody(genInfo, clauseOps); if (numThreadsClauseOps.numThreadsVar) { - if (mustEvalOutsideTarget) - targetOp.getNumThreadsMutable().assign( + if (parentTarget) + parentTarget.getNumThreadsMutable().assign( numThreadsClauseOps.numThreadsVar); else parallelOp.getNumThreadsVarMutable().assign( @@ -1639,15 +1665,6 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, return parallelOp; } - bool privatize = !outerCombined; - DataSharingProcessor dsp(converter, semaCtx, clauses, eval, - /*useDelayedPrivatization=*/true, &symTable); - - if (privatize) { - dsp.processStep1(); - dsp.processStep2(&clauseOps, &privateSyms); - } - auto genRegionEntryCB = [&](mlir::Operation *op) { auto parallelOp = llvm::cast(op); @@ -1657,7 +1674,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, mlir::OperandRange privateVars = parallelOp.getPrivateVars(); mlir::Region ®ion = parallelOp.getRegion(); - llvm::SmallVector privateVarTypes = reductionTypes; + llvm::SmallVector privateVarTypes(reductionTypes); privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); llvm::transform(privateVars, std::back_inserter(privateVarTypes), [](mlir::Value v) { return v.getType(); }); @@ -1670,23 +1687,26 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, firOpBuilder.createBlock(®ion, /*insertPt=*/{}, privateVarTypes, privateVarLocs); - llvm::SmallVector allSymbols = - reductionSyms; - allSymbols.append(privateSyms); - for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { - converter.bindSymbol(*arg, prv); + llvm::SmallVector allSymbols( + reductionSyms); + allSymbols.append(privateSyms.begin(), privateSyms.end()); + + if (!loopWrapper) { + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) + converter.bindSymbol(*arg, prv); } return allSymbols; }; // TODO Merge with the reduction CB. - genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + genInfo.setGenRegionEntryCb(genRegionEntryCB); auto parallelOp = genOpWithBody(genInfo, clauseOps); if (numThreadsClauseOps.numThreadsVar) { - if (mustEvalOutsideTarget) - targetOp.getNumThreadsMutable().assign(numThreadsClauseOps.numThreadsVar); + if (parentTarget) + parentTarget.getNumThreadsMutable().assign( + numThreadsClauseOps.numThreadsVar); else parallelOp.getNumThreadsVarMutable().assign( numThreadsClauseOps.numThreadsVar); @@ -1724,44 +1744,18 @@ static mlir::omp::SimdOp genSimdOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, mlir::Location loc, - const List &clauses) { - // TODO Remove omp.loop_nest related codegen from here. + const List &clauses, mlir::omp::SimdClauseOps &clauseOps, + DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, semaCtx, clauses, eval); - dsp.processStep1(); - - Fortran::lower::StatementContext stmtCtx; - mlir::omp::LoopNestClauseOps loopClauseOps; - mlir::omp::SimdClauseOps simdClauseOps; - llvm::SmallVector iv; - genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopClauseOps, iv); - genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); // Create omp.simd wrapper. - auto simdOp = firOpBuilder.create(loc, simdClauseOps); + auto simdOp = firOpBuilder.create(loc, clauseOps); - // TODO: Add reduction-related arguments to the wrapper's entry block. + // TODO: Populate entry block arguments with reduction variables. firOpBuilder.createBlock(&simdOp.getRegion()); firOpBuilder.setInsertionPoint( Fortran::lower::genOpenMPTerminator(firOpBuilder, simdOp, loc)); - // Create nested omp.loop_nest and fill body with loop contents. - auto loopOp = firOpBuilder.create(loc, loopClauseOps); - - auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses)); - - auto ivCallback = [&](mlir::Operation *op) { - genLoopVars(op, converter, loc, iv); - return iv; - }; - - createBodyOfOp(*loopOp, - OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval, - llvm::omp::Directive::OMPD_simd) - .setClauses(&clauses) - .setDataSharingProcessor(&dsp) - .setGenRegionEntryCb(ivCallback)); - return simdOp; } @@ -1785,8 +1779,8 @@ static mlir::omp::TargetOp genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location loc, const List &clauses, - bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { + mlir::Location loc, const List &clauses, bool outerCombined, + DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; @@ -1794,10 +1788,6 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, !llvm::cast(*converter.getModuleOp()) .getIsTargetDevice(); - DataSharingProcessor localDSP(converter, semaCtx, clauses, eval); - DataSharingProcessor &actualDSP = dsp ? *dsp : localDSP; - actualDSP.processStep1(); - mlir::omp::TargetClauseOps clauseOps; llvm::SmallVector mapSyms, devicePtrSyms, deviceAddrSyms, reductionSyms; @@ -1815,7 +1805,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, // attribute clauses (neither data-sharing; e.g. `private`, nor `map` // clauses). auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { - if (actualDSP.getPrivatizedSymbols().contains(&sym)) + if (dsp.getPrivatizedSymbols().contains(&sym)) return; if (llvm::find(mapSyms, &sym) == mapSyms.end()) { @@ -1903,7 +1893,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, auto targetOp = firOpBuilder.create(loc, clauseOps); genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms, - mapLocs, mapTypes, loc, actualDSP); + mapLocs, mapTypes, loc, dsp); return targetOp; } @@ -2073,129 +2063,387 @@ static mlir::omp::WsloopOp genWsloopOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, mlir::Location loc, - const List &clauses, DataSharingProcessor &dsp) { - // TODO Remove omp.loop_nest related codegen from here. + const List &clauses, mlir::omp::WsloopClauseOps &clauseOps, + llvm::ArrayRef reductionSyms, + llvm::ArrayRef reductionTypes, + DataSharingProcessor &dsp) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - dsp.processStep1(); - dsp.processStep2(); - Fortran::lower::StatementContext stmtCtx; - mlir::omp::LoopNestClauseOps loopClauseOps; - mlir::omp::WsloopClauseOps wsClauseOps; + // Create omp.wsloop wrapper. + llvm::SmallVector reductionLocs(reductionSyms.size(), loc); + auto wsloopOp = firOpBuilder.create(loc, clauseOps); + + // Populate entry block arguments with reduction variables. + firOpBuilder.createBlock(&wsloopOp.getRegion(), {}, reductionTypes, + reductionLocs); + + firOpBuilder.setInsertionPoint( + Fortran::lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc)); + + return wsloopOp; +} + +//===----------------------------------------------------------------------===// +// Code generation functions for the standalone version of constructs that can +// be a leaf in a composite construct +//===----------------------------------------------------------------------===// + +static void +genStandaloneDistribute(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { + mlir::omp::DistributeClauseOps distributeClauseOps; + // TODO: Process DISTRIBUTE clauses + + mlir::omp::LoopNestClauseOps loopNestClauseOps; llvm::SmallVector iv; - llvm::SmallVector reductionTypes; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + auto distributeOp = genDistributeOp(converter, semaCtx, eval, loc, clauses, + distributeClauseOps, dsp); + + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + /*wrapperSyms=*/{}, distributeOp.getRegion().getArguments(), + llvm::omp::Directive::OMPD_distribute, dsp); +} + +static void genStandaloneDo(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { + Fortran::lower::StatementContext stmtCtx; + + mlir::omp::WsloopClauseOps wsloopClauseOps; llvm::SmallVector reductionSyms; - genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopClauseOps, iv); - genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsClauseOps, + llvm::SmallVector reductionTypes; + genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsloopClauseOps, reductionTypes, reductionSyms); - // Create omp.wsloop wrapper and populate entry block arguments with reduction - // variables. - auto wsloopOp = firOpBuilder.create(loc, wsClauseOps); - llvm::SmallVector reductionLocs(reductionSyms.size(), loc); - mlir::Block *wsloopEntryBlock = firOpBuilder.createBlock( - &wsloopOp.getRegion(), {}, reductionTypes, reductionLocs); - firOpBuilder.setInsertionPoint( - Fortran::lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc)); + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); - // Create nested omp.loop_nest and fill body with loop contents. - auto loopOp = firOpBuilder.create(loc, loopClauseOps); + auto wsloopOp = + genWsloopOp(converter, semaCtx, eval, loc, clauses, wsloopClauseOps, + reductionSyms, reductionTypes, dsp); - auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses)); + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + reductionSyms, wsloopOp.getRegion().getArguments(), + llvm::omp::Directive::OMPD_do, dsp); +} - auto ivCallback = [&](mlir::Operation *op) { - genLoopVars(op, converter, loc, iv, reductionSyms, - wsloopEntryBlock->getArguments()); - return iv; - }; +static void genStandaloneParallel( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, const List &clauses, + const mlir::omp::PrivateClauseOps &privateClauseOps, + llvm::ArrayRef privateSyms, + mlir::Location loc, bool genNested, bool outerCombined, + DataSharingProcessor *dsp = nullptr) { + Fortran::lower::StatementContext stmtCtx; - createBodyOfOp(*loopOp, - OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval, - llvm::omp::Directive::OMPD_do) - .setClauses(&clauses) - .setDataSharingProcessor(&dsp) - .setReductions(&reductionSyms, &reductionTypes) - .setGenRegionEntryCb(ivCallback)); + auto offloadModOp = + llvm::cast(*converter.getModuleOp()); + mlir::omp::TargetOp targetOp = + findParentTargetOp(converter.getFirOpBuilder()); + bool evalOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); - // Create trip_count if inside of omp.target and this is host compilation - auto offloadMod = llvm::dyn_cast( - firOpBuilder.getModule().getOperation()); - auto targetOp = wsloopOp->getParentOfType(); + mlir::omp::ParallelClauseOps parallelClauseOps; + mlir::omp::NumThreadsClauseOps numThreadsClauseOps; + llvm::SmallVector reductionSyms; + llvm::SmallVector reductionTypes; + genParallelClauses(converter, semaCtx, stmtCtx, clauses, loc, + /*processReduction=*/!outerCombined, evalOutsideTarget, + parallelClauseOps, numThreadsClauseOps, reductionTypes, + reductionSyms); + parallelClauseOps.privateVars = privateClauseOps.privateVars; + parallelClauseOps.privatizers = privateClauseOps.privatizers; - if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && - targetOp.isTargetSPMDLoop()) { - // Lower loop bounds and step, and process collapsing again, putting lowered - // values outside of omp.target this time. This enables calculating and - // accessing the trip count in the host, which is needed when lowering to - // LLVM IR via the OMPIRBuilder. - HostClausesInsertionGuard guard(firOpBuilder); - mlir::omp::CollapseClauseOps collapseClauseOps; - llvm::SmallVector iv; - ClauseProcessor cp(converter, semaCtx, clauses); - cp.processCollapse(loc, eval, collapseClauseOps, iv); - targetOp.getTripCountMutable().assign( - calculateTripCount(converter, loc, collapseClauseOps)); - } + genParallelOp(converter, semaCtx, eval, /*loopWrapper=*/false, genNested, loc, + clauses, parallelClauseOps, numThreadsClauseOps, reductionSyms, + reductionTypes, privateSyms, + evalOutsideTarget ? targetOp : nullptr, outerCombined, dsp); +} - return wsloopOp; +static void genStandaloneSimd(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { + mlir::omp::SimdClauseOps simdClauseOps; + genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + auto simdOp = + genSimdOp(converter, semaCtx, eval, loc, clauses, simdClauseOps, dsp); + + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + /*wrapperSyms=*/{}, simdOp.getRegion().getArguments(), + llvm::omp::Directive::OMPD_simd, dsp); } //===----------------------------------------------------------------------===// // Code generation functions for composite constructs //===----------------------------------------------------------------------===// -static void -genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const List &clauses, - mlir::Location loc) { - // TODO Add temporary solution - TODO(loc, "Composite DISTRIBUTE PARALLEL DO"); +static void genCompositeDistributeParallelDo( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, const List &clauses, + const mlir::omp::PrivateClauseOps &privateClauseOps, + llvm::ArrayRef privateSyms, + mlir::Location loc, DataSharingProcessor &dsp) { + Fortran::lower::StatementContext stmtCtx; + + auto offloadModOp = + llvm::cast(*converter.getModuleOp()); + mlir::omp::TargetOp targetOp = + findParentTargetOp(converter.getFirOpBuilder()); + bool evalOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); + + // Clause processing. + mlir::omp::DistributeClauseOps distributeClauseOps; + // TODO: Process DISTRIBUTE clauses + + mlir::omp::ParallelClauseOps parallelClauseOps; + mlir::omp::NumThreadsClauseOps numThreadsClauseOps; + llvm::SmallVector parallelReductionSyms; + llvm::SmallVector parallelReductionTypes; + genParallelClauses(converter, semaCtx, stmtCtx, clauses, loc, + /*processReduction=*/true, + /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps, + numThreadsClauseOps, parallelReductionTypes, + parallelReductionSyms); + parallelClauseOps.privateVars = privateClauseOps.privateVars; + parallelClauseOps.privatizers = privateClauseOps.privatizers; + + mlir::omp::WsloopClauseOps wsloopClauseOps; + llvm::SmallVector wsloopReductionSyms; + llvm::SmallVector wsloopReductionTypes; + genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsloopClauseOps, + wsloopReductionTypes, wsloopReductionSyms); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + // Operation creation. + auto distributeOp = genDistributeOp(converter, semaCtx, eval, loc, clauses, + distributeClauseOps, dsp); + + auto parallelOp = genParallelOp( + converter, semaCtx, eval, /*loopWrapper=*/true, /*genNested=*/false, loc, + clauses, parallelClauseOps, numThreadsClauseOps, parallelReductionSyms, + parallelReductionTypes, privateSyms, + evalOutsideTarget ? targetOp : nullptr, + /*outerCombined=*/false, &dsp); + + auto wsloopOp = + genWsloopOp(converter, semaCtx, eval, loc, clauses, wsloopClauseOps, + wsloopReductionSyms, wsloopReductionTypes, dsp); + + // Construct wrapper entry block list and associated symbols. It is important + // that the symbol order and the block argument order match, so that the + // symbol-value bindings created are correct. + auto wrapperSyms = + llvm::to_vector(llvm::concat( + parallelReductionSyms, privateSyms, wsloopReductionSyms)); + + auto wrapperArgs = llvm::to_vector( + llvm::concat(distributeOp.getRegion().getArguments(), + parallelOp.getRegion().getArguments(), + wsloopOp.getRegion().getArguments())); + + assert(wrapperSyms.size() == wrapperArgs.size() && + "Number of symbols and wrapper block arguments must match"); + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + wrapperSyms, wrapperArgs, + llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); } static void genCompositeDistributeParallelDoSimd( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const List &clauses, - mlir::Location loc) { - TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD"); + const mlir::omp::PrivateClauseOps &privateClauseOps, + llvm::ArrayRef privateSyms, + mlir::Location loc, DataSharingProcessor &dsp) { + Fortran::lower::StatementContext stmtCtx; + + auto offloadModOp = + llvm::cast(*converter.getModuleOp()); + mlir::omp::TargetOp targetOp = + findParentTargetOp(converter.getFirOpBuilder()); + bool evalOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); + + // Clause processing. + mlir::omp::DistributeClauseOps distributeClauseOps; + // TODO: Process DISTRIBUTE clauses + + mlir::omp::ParallelClauseOps parallelClauseOps; + mlir::omp::NumThreadsClauseOps numThreadsClauseOps; + llvm::SmallVector parallelReductionSyms; + llvm::SmallVector parallelReductionTypes; + genParallelClauses(converter, semaCtx, stmtCtx, clauses, loc, + /*processReduction=*/true, + /*evalOutsideTarget=*/evalOutsideTarget, parallelClauseOps, + numThreadsClauseOps, parallelReductionTypes, + parallelReductionSyms); + parallelClauseOps.privateVars = privateClauseOps.privateVars; + parallelClauseOps.privatizers = privateClauseOps.privatizers; + + mlir::omp::WsloopClauseOps wsloopClauseOps; + llvm::SmallVector wsloopReductionSyms; + llvm::SmallVector wsloopReductionTypes; + genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsloopClauseOps, + wsloopReductionTypes, wsloopReductionSyms); + + mlir::omp::SimdClauseOps simdClauseOps; + genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + // Operation creation. + auto distributeOp = genDistributeOp(converter, semaCtx, eval, loc, clauses, + distributeClauseOps, dsp); + + auto parallelOp = genParallelOp( + converter, semaCtx, eval, /*loopWrapper=*/true, /*genNested=*/false, loc, + clauses, parallelClauseOps, numThreadsClauseOps, parallelReductionSyms, + parallelReductionTypes, privateSyms, + evalOutsideTarget ? targetOp : nullptr, + /*outerCombined=*/false, &dsp); + + auto wsloopOp = + genWsloopOp(converter, semaCtx, eval, loc, clauses, wsloopClauseOps, + wsloopReductionSyms, wsloopReductionTypes, dsp); + + auto simdOp = + genSimdOp(converter, semaCtx, eval, loc, clauses, simdClauseOps, dsp); + + // Construct wrapper entry block list and associated symbols. It is important + // that the symbol order and the block argument order match, so that the + // symbol-value bindings created are correct. + auto wrapperSyms = + llvm::to_vector(llvm::concat( + parallelReductionSyms, privateSyms, wsloopReductionSyms)); + + auto wrapperArgs = llvm::to_vector(llvm::concat( + distributeOp.getRegion().getArguments(), + parallelOp.getRegion().getArguments(), + wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); + + assert(wrapperSyms.size() == wrapperArgs.size() && + "Number of symbols and wrapper block arguments must match"); + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + wrapperSyms, wrapperArgs, + llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp); } static void genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const List &clauses, mlir::Location loc) { - TODO(loc, "Composite DISTRIBUTE SIMD"); + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { + // Clause processing. + mlir::omp::DistributeClauseOps distributeClauseOps; + // TODO: Process DISTRIBUTE clauses + + mlir::omp::SimdClauseOps simdClauseOps; + genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + // Operation creation. + auto distributeOp = genDistributeOp(converter, semaCtx, eval, loc, clauses, + distributeClauseOps, dsp); + + auto simdOp = + genSimdOp(converter, semaCtx, eval, loc, clauses, simdClauseOps, dsp); + + // Construct wrapper entry block list and associated symbols. It is important + // that the symbol order and the block argument order match, so that the + // symbol-value bindings created are correct. + auto wrapperArgs = llvm::to_vector( + llvm::concat(distributeOp.getRegion().getArguments(), + simdOp.getRegion().getArguments())); + + assert(wrapperArgs.empty() && + "Block args for omp.simd and omp.distribute currently not expected"); + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + /*wrapperSyms=*/{}, wrapperArgs, + llvm::omp::Directive::OMPD_distribute_simd, dsp); } static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const List &clauses, - mlir::Location loc) { - ClauseProcessor cp(converter, semaCtx, clauses); - cp.processTODO( - loc, llvm::omp::OMPD_do_simd); - // TODO: Add support for vectorization - add vectorization hints inside loop - // body. - // OpenMP standard does not specify the length of vector instructions. - // Currently we safely assume that for !$omp do simd pragma the SIMD length - // is equal to 1 (i.e. we generate standard workshare loop). - // When support for vectorization is enabled, then we need to add handling of - // if clause. Currently if clause can be skipped because we always assume - // SIMD length = 1. - DataSharingProcessor dsp(converter, semaCtx, clauses, eval); - genWsloopOp(converter, semaCtx, eval, loc, clauses, dsp); + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { + Fortran::lower::StatementContext stmtCtx; + + // Clause processing. + mlir::omp::WsloopClauseOps wsloopClauseOps; + llvm::SmallVector wsloopReductionSyms; + llvm::SmallVector wsloopReductionTypes; + genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsloopClauseOps, + wsloopReductionTypes, wsloopReductionSyms); + + mlir::omp::SimdClauseOps simdClauseOps; + genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); + + mlir::omp::LoopNestClauseOps loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopNestClauseOps, + iv); + + // Operation creation. + auto wsloopOp = + genWsloopOp(converter, semaCtx, eval, loc, clauses, wsloopClauseOps, + wsloopReductionSyms, wsloopReductionTypes, dsp); + + auto simdOp = + genSimdOp(converter, semaCtx, eval, loc, clauses, simdClauseOps, dsp); + + // Construct wrapper entry block list and associated symbols. It is important + // that the symbol order and the block argument order match, so that the + // symbol-value bindings created are correct. + auto wrapperArgs = llvm::to_vector(llvm::concat( + wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); + + assert(wsloopReductionSyms.size() == wrapperArgs.size() && + "Number of symbols and wrapper block arguments must match"); + genLoopNestOp(converter, semaCtx, eval, loc, clauses, loopNestClauseOps, iv, + wsloopReductionSyms, wrapperArgs, + llvm::omp::Directive::OMPD_do_simd, dsp); } static void genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const List &clauses, mlir::Location loc) { + const List &clauses, mlir::Location loc, + DataSharingProcessor &dsp) { TODO(loc, "Composite TASKLOOP SIMD"); } @@ -2522,21 +2770,36 @@ genOMP(Fortran::lower::AbstractConverter &converter, genOrderedRegionOp(converter, semaCtx, eval, genNested, currentLocation, clauses); break; - case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_parallel: { // 2.6 PARALLEL construct. - genParallelOp(converter, symTable, semaCtx, eval, genNested, - currentLocation, clauses, outerCombined); + mlir::omp::PrivateClauseOps privateClauseOps; + llvm::SmallVector privateSyms; + DataSharingProcessor dsp(converter, semaCtx, clauses, eval, + enableDelayedPrivatization, &symTable); + if (enableDelayedPrivatization && !outerCombined) { + dsp.processStep1(); + dsp.processStep2(&privateClauseOps, &privateSyms); + } + genStandaloneParallel(converter, semaCtx, eval, clauses, privateClauseOps, + privateSyms, currentLocation, genNested, + outerCombined, + enableDelayedPrivatization ? &dsp : nullptr); break; + } case llvm::omp::Directive::OMPD_single: // 2.8.2 SINGLE construct. genSingleOp(converter, semaCtx, eval, genNested, currentLocation, clauses); break; - case llvm::omp::Directive::OMPD_target: + case llvm::omp::Directive::OMPD_target: { // 2.12.5 TARGET construct. + DataSharingProcessor dsp(converter, semaCtx, clauses, eval, + enableDelayedPrivatization, &symTable); + dsp.processStep1(); genTargetOp(converter, semaCtx, eval, genNested, currentLocation, clauses, - outerCombined); + outerCombined, dsp); break; + } case llvm::omp::Directive::OMPD_target_data: // 2.12.2 TARGET DATA construct. genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation, @@ -2623,70 +2886,100 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, semaCtx)); } - DataSharingProcessor dsp(converter, semaCtx, clauses, eval); + mlir::omp::PrivateClauseOps privateClauseOps; + llvm::SmallVector privateSyms; + DataSharingProcessor dsp(converter, semaCtx, clauses, eval, + enableDelayedPrivatization, &symTable); + dsp.processStep1(); + std::optional nextDir = origDirective; while (nextDir) { llvm::omp::Directive leafDir; std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir); if (llvm::omp::compositeConstructSet.test(leafDir)) { assert(!nextDir && "Composite construct cannot be split"); + if (enableDelayedPrivatization) + dsp.processStep2(&privateClauseOps, &privateSyms); + else + dsp.processStep2(); + switch (leafDir) { case llvm::omp::Directive::OMPD_distribute_parallel_do: // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct. genCompositeDistributeParallelDo(converter, semaCtx, eval, clauses, - currentLocation); + privateClauseOps, privateSyms, + currentLocation, dsp); break; case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct. genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, clauses, - currentLocation); + privateClauseOps, privateSyms, + currentLocation, dsp); break; case llvm::omp::Directive::OMPD_distribute_simd: // 2.9.4.2 DISTRIBUTE SIMD construct. genCompositeDistributeSimd(converter, semaCtx, eval, clauses, - currentLocation); + currentLocation, dsp); break; case llvm::omp::Directive::OMPD_do_simd: // 2.9.3.2 Worksharing-Loop SIMD construct. - genCompositeDoSimd(converter, semaCtx, eval, clauses, currentLocation); + genCompositeDoSimd(converter, semaCtx, eval, clauses, currentLocation, + dsp); break; case llvm::omp::Directive::OMPD_taskloop_simd: // 2.10.3 TASKLOOP SIMD construct. genCompositeTaskloopSimd(converter, semaCtx, eval, clauses, - currentLocation); + currentLocation, dsp); break; default: llvm_unreachable("Unexpected composite construct"); } } else { const bool genNested = !nextDir; + bool isLoopLeaf = llvm::omp::getDirectiveAssociation(leafDir) == + llvm::omp::Association::Loop; + assert((!isLoopLeaf || !nextDir.has_value()) && + "Loop leaf expected to be last in combined construct"); + assert((isLoopLeaf || nextDir.has_value()) && + "Block leaf not expected to be last in combined construct"); + + if (enableDelayedPrivatization && + leafDir != llvm::omp::Directive::OMPD_target) + dsp.processStep2(&privateClauseOps, &privateSyms); + else if (isLoopLeaf) + dsp.processStep2(); + switch (leafDir) { case llvm::omp::Directive::OMPD_distribute: // 2.9.4.1 DISTRIBUTE construct. - genDistributeOp(converter, semaCtx, eval, genNested, currentLocation, - clauses, &dsp); + genStandaloneDistribute(converter, semaCtx, eval, clauses, + currentLocation, dsp); break; case llvm::omp::Directive::OMPD_do: // 2.9.2 Worksharing-Loop construct. - genWsloopOp(converter, semaCtx, eval, currentLocation, clauses, dsp); + genStandaloneDo(converter, semaCtx, eval, clauses, currentLocation, + dsp); break; case llvm::omp::Directive::OMPD_parallel: // 2.6 PARALLEL construct. // FIXME This is not necessarily always the outer leaf construct of a - // combined construct in this constext (e.g. distribute parallel do). + // combined construct in this context (e.g. target parallel do). // Maybe rename the argument if it represents something else or // initialize it properly. - genParallelOp(converter, symTable, semaCtx, eval, genNested, - currentLocation, clauses, /*outerCombined=*/true); + genStandaloneParallel(converter, semaCtx, eval, clauses, + privateClauseOps, privateSyms, currentLocation, + genNested, /*outerCombined=*/true, + enableDelayedPrivatization ? &dsp : nullptr); break; case llvm::omp::Directive::OMPD_simd: // 2.9.3.1 SIMD construct. - genSimdOp(converter, semaCtx, eval, currentLocation, clauses); + genStandaloneSimd(converter, semaCtx, eval, clauses, currentLocation, + dsp); break; case llvm::omp::Directive::OMPD_target: // 2.12.5 TARGET construct. genTargetOp(converter, semaCtx, eval, genNested, currentLocation, - clauses, /*outerCombined=*/true, &dsp); + clauses, /*outerCombined=*/true, dsp); break; case llvm::omp::Directive::OMPD_taskloop: // 2.10.2 TASKLOOP construct. @@ -2695,7 +2988,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_teams: // 2.7 TEAMS construct. // FIXME This is not necessarily always the outer leaf construct of a - // combined construct in this constext (e.g. target teams distribute). + // combined construct in this context (e.g. target teams distribute). // Maybe rename the argument if it represents something else or // initialize it properly. genTeamsOp(converter, semaCtx, eval, genNested, currentLocation, @@ -2755,9 +3048,15 @@ genOMP(Fortran::lower::AbstractConverter &converter, std::get(beginSectionsDirective.t) .v; if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - genParallelOp(converter, symTable, semaCtx, eval, - /*genNested=*/false, currentLocation, clauses, - /*outerCombined=*/true); + mlir::omp::PrivateClauseOps privateClauseOps; + llvm::SmallVector privateSyms; + DataSharingProcessor dsp(converter, semaCtx, clauses, eval, + enableDelayedPrivatization, &symTable); + // No calls to dsp.step1,2() because outerCombined is always true here. + genStandaloneParallel(converter, semaCtx, eval, clauses, privateClauseOps, + privateSyms, currentLocation, /*genNested=*/false, + /*outerCombined=*/true, + enableDelayedPrivatization ? &dsp : nullptr); } // SECTIONS construct. diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 6c9cb4fade7ced..25bcb45b157e6b 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -72,7 +72,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( void ReductionProcessor::addReductionSym( const omp::clause::Reduction &reduction, - llvm::SmallVector &symbols) { + llvm::SmallVectorImpl &symbols) { const auto &objectList{std::get(reduction.t)}; llvm::transform(objectList, std::back_inserter(symbols), [](const Object &object) { return object.id(); }); diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h index 2b49f02835ab0d..17c8ae7b69214a 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.h +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h @@ -112,7 +112,7 @@ class ReductionProcessor { static void addReductionSym( const omp::clause::Reduction &reduction, - llvm::SmallVector &symbols); + llvm::SmallVectorImpl &symbols); /// Creates an OpenMP reduction declaration and inserts it into the provided /// symbol table. The declaration has a constant initializer with the neutral diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 0258af0182128a..f322580cdab8fc 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -69,8 +69,6 @@ int64_t getCollapseValue(const List &clauses); Fortran::semantics::Symbol * getOmpObjectSymbol(const Fortran::parser::OmpObject &ompObject); -mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder); - void genObjectList(const ObjectList &objects, Fortran::lower::AbstractConverter &converter, llvm::SmallVectorImpl &operands); diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index a0fbae5b614cc7..6bb43439fe74df 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -248,14 +248,45 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto ompOutlineableIface = getRegion() .getParentOfType()) { - return ompOutlineableIface.getAllocaBlock(); + // omp.parallel can work as a block construct but it can also be a loop + // wrapper when part of a composite construct. Make sure it's only treated + // as a block if it's not a wrapper. + auto parallelOp = + llvm::dyn_cast(*ompOutlineableIface); + if (!parallelOp || !llvm::isa_and_present( + parallelOp->getParentOp())) + return ompOutlineableIface.getAllocaBlock(); } + + // All allocations associated with an OpenMP loop wrapper must happen outside + // of all wrappers. + mlir::Operation *currentOp = getRegion().getParentOp(); + auto wrapperIface = + llvm::isa(currentOp) + ? llvm::cast( + currentOp->getParentOp()) + : llvm::dyn_cast(currentOp); + if (wrapperIface) { + // Cannot use LoopWrapperInterface methods here because the whole nest may + // not have been created at this point. Manually traverse parents instead. + mlir::omp::LoopWrapperInterface lastWrapperOp = wrapperIface; + while (true) { + if (auto nextWrapper = + llvm::dyn_cast_if_present( + lastWrapperOp->getParentOp())) + lastWrapperOp = nextWrapper; + else + break; + } + return &lastWrapperOp->getParentRegion()->front(); + } + if (getRegion().getParentOfType()) return &getRegion().front(); + if (auto accRecipeIface = - getRegion().getParentOfType()) { + getRegion().getParentOfType()) return accRecipeIface.getAllocaBlock(getRegion()); - } return getEntryBlock(); } @@ -266,9 +297,15 @@ mlir::Value fir::FirOpBuilder::createTemporaryAlloc( llvm::ArrayRef attrs) { assert(!type.isa() && "cannot be a reference"); // If the alloca is inside an OpenMP Op which will be outlined then pin - // the alloca here. - const bool pinned = + // the alloca here. Make sure that an omp.parallel operation that is taking + // a loop wrapper role is not detected as outlineable here. + auto iface = getRegion().getParentOfType(); + auto parallelOp = + iface ? llvm::dyn_cast(*iface) : nullptr; + const bool pinned = + iface && (!parallelOp || !llvm::isa_and_present( + parallelOp->getParentOp())); mlir::Value temp = create(loc, type, /*unique_name=*/llvm::StringRef{}, name, pinned, lenParams, shape, attrs); diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 26871d88881555..4ef81faa140b3d 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -239,9 +239,16 @@ mlir::Value ConvertFIRToLLVMPattern::genBoxAttributeCheck( // 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp mlir::Block * ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const { - if (auto iface = mlir::dyn_cast(op)) - return iface.getAllocaBlock(); - if (auto llvmFuncOp = mlir::dyn_cast(op)) + if (auto iface = + mlir::dyn_cast(op)) { + // omp.parallel can work as a block construct but it can also be a loop + // wrapper when it's part of a composite construct. Make sure it's only + // treated as a block if it's not a wrapper. + auto parallelOp = llvm::dyn_cast(*iface); + if (!parallelOp || !llvm::isa_and_present( + parallelOp->getParentOp())) + return iface.getAllocaBlock(); + } else if (auto llvmFuncOp = mlir::dyn_cast(op)) return &llvmFuncOp.front(); return getBlockForAllocaInsert(op->getParentOp()); } diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp index c187afab44ef20..44e9177a316d85 100644 --- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp @@ -99,15 +99,19 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { "constant LB, UB, and step values."); } - llvm::SmallVector lowerBound, upperBound, step; - lowerBound.push_back(rewriter.clone(*lbOp)->getResult(0)); - upperBound.push_back(rewriter.clone(*ubOp)->getResult(0)); - step.push_back(rewriter.clone(*stepOp)->getResult(0)); + mlir::omp::LoopNestClauseOps clauseOps; + clauseOps.loopLBVar.push_back(rewriter.clone(*lbOp)->getResult(0)); + clauseOps.loopUBVar.push_back(rewriter.clone(*ubOp)->getResult(0)); + clauseOps.loopStepVar.push_back(rewriter.clone(*stepOp)->getResult(0)); + clauseOps.loopInclusiveAttr = rewriter.getUnitAttr(); // ==== TODO (1) End ==== + auto wsloopOp = rewriter.create(doLoop.getLoc()); + rewriter.createBlock(&wsloopOp.getRegion()); + rewriter.setInsertionPoint( + rewriter.create(wsloopOp.getLoc())); - auto wsLoopOp = rewriter.create( - doLoop.getLoc(), lowerBound, upperBound, step); - wsLoopOp.setInclusive(true); + auto loopNestOp = + rewriter.create(doLoop.getLoc(), clauseOps); auto outlineableOp = mlir::dyn_cast(*parallelOp); @@ -180,11 +184,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // Clone the loop's body inside the worksharing construct using the mapped // memref values. - rewriter.cloneRegionBefore(doLoop.getRegion(), wsLoopOp.getRegion(), - wsLoopOp.getRegion().begin(), mapper); + rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(), + loopNestOp.getRegion().begin(), mapper); - mlir::Operation *terminator = wsLoopOp.getRegion().back().getTerminator(); - rewriter.setInsertionPointToEnd(&wsLoopOp.getRegion().back()); + mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator(); + rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back()); rewriter.create(terminator->getLoc()); rewriter.eraseOp(terminator); diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index 1c213abefe6f5f..34949ab4460e77 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -571,8 +571,31 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { return {point}; }; - auto oldOmpRegion = - oldAlloc->getParentOfType(); + // Find the first OpenMP outlineable parent region while taking into account + // the possibility of finding an omp.parallel region that is taking a loop + // wrapper role. These operations must be skipped, as they cannot hold + // allocations. + const auto findOmpRegion = [](mlir::Operation *op) { + auto findOmpRegionImpl = + [](mlir::Operation *op, + auto &findOmpRegion) -> mlir::omp::OutlineableOpenMPOpInterface { + auto ompRegion = + op->getParentOfType(); + if (!ompRegion) + return nullptr; + + if (auto parallelOp = + mlir::dyn_cast_if_present(*ompRegion)) { + mlir::Operation *parentOp = parallelOp->getParentOp(); + if (mlir::isa_and_present(parentOp)) + return findOmpRegion(parentOp, findOmpRegion); + } + return ompRegion; + }; + return findOmpRegionImpl(op, findOmpRegionImpl); + }; + + auto oldOmpRegion = findOmpRegion(oldAlloc); // Find when the last operand value becomes available mlir::Block *operandsBlock = nullptr; @@ -600,8 +623,7 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { LLVM_DEBUG(llvm::dbgs() << "--Placing after last operand: " << *lastOperand << "\n"); // check we aren't moving out of an omp region - auto lastOpOmpRegion = - lastOperand->getParentOfType(); + auto lastOpOmpRegion = findOmpRegion(lastOperand); if (lastOpOmpRegion == oldOmpRegion) return checkReturn(lastOperand); // Presumably this happened because the operands became ready before the diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 8cf4f566964f91..e8fe388a71a930 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -199,6 +199,7 @@ func.func @_QPsimd1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref fir.store %3 to %6 : !fir.ref omp.yield } + omp.terminator } omp.terminator } @@ -223,6 +224,7 @@ func.func @_QPsimd1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref // CHECK: llvm.store %[[I1]], %[[ARR_I_REF]] : i32, !llvm.ptr // CHECK: omp.yield // CHECK: } +// CHECK: omp.terminator // CHECK: } // CHECK: omp.terminator // CHECK: } @@ -516,6 +518,7 @@ func.func @_QPsimd_with_nested_loop() { fir.store %7 to %3 : !fir.ref omp.yield } + omp.terminator } return } @@ -536,6 +539,7 @@ func.func @_QPsimd_with_nested_loop() { // CHECK: ^bb3: // CHECK: omp.yield // CHECK: } +// CHECK: omp.terminator // CHECK: } // CHECK: llvm.return // CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/if-clause.f90 b/flang/test/Lower/OpenMP/FIR/if-clause.f90 index b079ca5ddd2c62..a84a09723e336f 100644 --- a/flang/test/Lower/OpenMP/FIR/if-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/if-clause.f90 @@ -9,7 +9,6 @@ program main ! TODO When they are supported, add tests for: ! - PARALLEL SECTIONS ! - PARALLEL WORKSHARE - ! - TARGET UPDATE ! - TASKLOOP ! - TASKLOOP SIMD @@ -20,12 +19,16 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do simd do i = 1, 10 end do @@ -33,42 +36,67 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do simd if(.true.) do i = 1, 10 end do !$omp end distribute parallel do simd - + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do !$omp end distribute parallel do simd - + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do simd if(parallel: .true.) do i = 1, 10 end do !$omp end distribute parallel do simd - + ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -83,9 +111,13 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do do i = 1, 10 end do @@ -93,8 +125,13 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do if(.true.) do i = 1, 10 end do @@ -102,8 +139,13 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp distribute parallel do if(parallel: .true.) do i = 1, 10 end do @@ -118,7 +160,8 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) - ! CHECK: omp.simdloop + ! CHECK-SAME: { + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp distribute simd @@ -128,8 +171,10 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) - ! CHECK: omp.simdloop + ! CHECK-SAME: { + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp distribute simd if(.true.) do i = 1, 10 end do @@ -137,8 +182,10 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) - ! CHECK: omp.simdloop + ! CHECK-SAME: { + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp distribute simd if(simd: .true.) do i = 1, 10 end do @@ -152,18 +199,31 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp do simd do i = 1, 10 end do !$omp end do simd ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp do simd if(.true.) do i = 1, 10 end do !$omp end do simd ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp do simd if(simd: .true.) do i = 1, 10 end do @@ -181,12 +241,14 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel if(.true.) i = 10 !$omp end parallel ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel if(parallel: .true.) i = 10 !$omp end parallel @@ -197,6 +259,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do do i = 1, 10 end do @@ -204,6 +269,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do if(.true.) do i = 1, 10 end do @@ -211,6 +280,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do if(parallel: .true.) do i = 1, 10 end do @@ -225,6 +298,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd do i = 1, 10 end do @@ -232,7 +308,13 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(.true.) do i = 1, 10 end do @@ -240,7 +322,13 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -251,6 +339,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(parallel: .true.) do i = 1, 10 end do @@ -260,6 +351,11 @@ program main ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -278,6 +374,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp simd if(.true.) do i = 1, 10 end do @@ -285,6 +382,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp simd if(simd: .true.) do i = 1, 10 end do @@ -301,11 +399,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target if(.true.) !$omp end target ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target if(target: .true.) !$omp end target @@ -320,11 +420,13 @@ program main ! CHECK: omp.target_data ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target data map(tofrom: i) if(.true.) !$omp end target data ! CHECK: omp.target_data ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target data map(tofrom: i) if(target data: .true.) !$omp end target data @@ -333,7 +435,6 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target_enter_data ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: map !$omp target enter data map(to: i) ! CHECK: omp.target_enter_data @@ -349,7 +450,6 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target_exit_data ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: map !$omp target exit data map(from: i) ! CHECK: omp.target_exit_data @@ -369,6 +469,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do do i = 1, 10 end do @@ -376,8 +479,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(.true.) do i = 1, 10 end do @@ -385,8 +493,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(target: .true.) if(parallel: .false.) do i = 1, 10 end do @@ -394,9 +507,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(target: .true.) do i = 1, 10 end do @@ -407,6 +524,10 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(parallel: .true.) do i = 1, 10 end do @@ -424,6 +545,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd do i = 1, 10 end do @@ -431,9 +555,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(.true.) do i = 1, 10 end do @@ -441,9 +572,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & !$omp& if(simd: .true.) do i = 1, 10 @@ -452,12 +590,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(target: .true.) do i = 1, 10 end do @@ -468,7 +610,13 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -489,22 +637,27 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel if(.true.) i = 1 !$omp end target parallel ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel if(target: .true.) if(parallel: .false.) i = 1 !$omp end target parallel ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -517,6 +670,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel if(parallel: .true.) i = 1 !$omp end target parallel @@ -537,8 +691,10 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(.true.) do i = 1, 10 end do @@ -546,8 +702,10 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(target: .true.) if(simd: .false.) do i = 1, 10 end do @@ -555,6 +713,7 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -568,6 +727,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(simd: .true.) do i = 1, 10 end do @@ -592,8 +752,10 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -604,8 +766,10 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -616,6 +780,7 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -632,6 +797,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -655,6 +821,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do do i = 1, 10 end do @@ -662,13 +831,19 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do if(.true.) do i = 1, 10 end do @@ -676,13 +851,19 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do if(target: .true.) if(teams: .false.) if(parallel: .true.) do i = 1, 10 end do @@ -690,6 +871,7 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -699,6 +881,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do if(target: .true.) do i = 1, 10 end do @@ -709,12 +894,16 @@ program main ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do if(teams: .true.) do i = 1, 10 end do @@ -731,6 +920,10 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do if(parallel: .true.) do i = 1, 10 end do @@ -761,14 +954,19 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do simd if(.true.) do i = 1, 10 end do @@ -776,14 +974,19 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do simd if(target: .true.) if(teams: .false.) if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -791,6 +994,7 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -813,6 +1017,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -838,6 +1043,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -859,6 +1065,8 @@ program main ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -876,7 +1084,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp target teams distribute simd @@ -886,13 +1094,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute simd if(.true.) do i = 1, 10 end do @@ -900,13 +1111,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute simd if(target: .true.) if(teams: .false.) if(simd: .false.) do i = 1, 10 end do @@ -914,13 +1128,14 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp target teams distribute simd if(target: .true.) @@ -933,10 +1148,11 @@ program main ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp target teams distribute simd if(teams: .true.) @@ -953,8 +1169,9 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams distribute simd if(simd: .true.) do i = 1, 10 end do @@ -962,7 +1179,7 @@ program main ! ---------------------------------------------------------------------------- ! TARGET TEAMS - ! ---------------------------------------------------------------------------- + ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -975,22 +1192,27 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams if(.true.) i = 1 !$omp end target teams ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams if(target: .true.) if(teams: .false.) i = 1 !$omp end target teams ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1003,10 +1225,27 @@ program main ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target teams if(teams: .true.) i = 1 !$omp end target teams + ! ---------------------------------------------------------------------------- + ! TARGET UPDATE + ! ---------------------------------------------------------------------------- + + ! CHECK: omp.target_update + ! CHECK-NOT: if({{.*}}) + !$omp target update to(i) + + ! CHECK: omp.target_update + ! CHECK-SAME: if({{.*}}) + !$omp target update to(i) if(.true.) + + ! CHECK: omp.target_update + ! CHECK-SAME: if({{.*}}) + !$omp target update to(i) if(target update: .true.) + ! ---------------------------------------------------------------------------- ! TASK ! ---------------------------------------------------------------------------- @@ -1018,11 +1257,13 @@ program main ! CHECK: omp.task ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp task if(.true.) !$omp end task ! CHECK: omp.task ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp task if(task: .true.) !$omp end task @@ -1042,6 +1283,7 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1052,6 +1294,7 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1072,6 +1315,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do do i = 1, 10 end do @@ -1079,11 +1325,16 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do if(.true.) do i = 1, 10 end do @@ -1091,11 +1342,16 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.) do i = 1, 10 end do @@ -1103,12 +1359,16 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do if(teams: .true.) do i = 1, 10 end do @@ -1122,6 +1382,10 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do if(parallel: .true.) do i = 1, 10 end do @@ -1142,6 +1406,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd do i = 1, 10 end do @@ -1149,12 +1416,19 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd if(.true.) do i = 1, 10 end do @@ -1162,12 +1436,19 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd if(teams: .false.) if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -1175,6 +1456,7 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -1184,6 +1466,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd if(teams: .true.) do i = 1, 10 end do @@ -1200,6 +1485,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd if(parallel: .true.) do i = 1, 10 end do @@ -1215,6 +1503,11 @@ program main ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -1229,7 +1522,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp teams distribute simd @@ -1239,11 +1532,13 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute simd if(.true.) do i = 1, 10 end do @@ -1251,11 +1546,13 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute simd if(teams: .true.) if(simd: .false.) do i = 1, 10 end do @@ -1263,10 +1560,11 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { !$omp teams distribute simd if(teams: .true.) @@ -1280,8 +1578,9 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simdloop + ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams distribute simd if(simd: .true.) do i = 1, 10 end do @@ -1299,12 +1598,14 @@ program main ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams if(.true.) i = 1 !$omp end teams ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams if(teams: .true.) i = 1 !$omp end teams diff --git a/flang/test/Lower/OpenMP/FIR/loop-combined.f90 b/flang/test/Lower/OpenMP/FIR/loop-combined.f90 index 31ba1772651366..0f887fdf68629d 100644 --- a/flang/test/Lower/OpenMP/FIR/loop-combined.f90 +++ b/flang/test/Lower/OpenMP/FIR/loop-combined.f90 @@ -44,7 +44,7 @@ program main !$omp teams ! CHECK: omp.distribute - ! CHECK: omp.simdloop + ! CHECK: omp.simd !$omp distribute simd do i = 1, 10 end do @@ -138,7 +138,7 @@ program main ! CHECK: omp.target ! CHECK: omp.teams ! CHECK: omp.distribute - ! CHECK: omp.simdloop + ! CHECK: omp.simd !$omp target teams distribute simd do i = 1, 10 end do @@ -151,7 +151,6 @@ program main ! CHECK: omp.target ! CHECK: omp.teams ! CHECK: omp.distribute - ! CHECK: omp.wsloop !$omp target teams distribute do i = 1, 10 end do @@ -199,7 +198,7 @@ program main ! CHECK: omp.teams ! CHECK: omp.distribute - ! CHECK: omp.simdloop + ! CHECK: omp.simd !$omp teams distribute simd do i = 1, 10 end do @@ -211,7 +210,6 @@ program main ! CHECK: omp.teams ! CHECK: omp.distribute - ! CHECK: omp.wsloop !$omp teams distribute do i = 1, 10 end do diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90 index db7d30295c45d9..c646451c92b8c5 100644 --- a/flang/test/Lower/OpenMP/FIR/simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/simd.f90 @@ -24,10 +24,10 @@ subroutine simd subroutine simd_with_if_clause(n, threshold) integer :: i, n, threshold !$OMP SIMD IF( n .GE. threshold ) + ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %arg0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: omp.simd if(%[[COND:.*]]) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 index e4b85fb447767f..08e74d6cb75b70 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 @@ -15,12 +15,12 @@ program wsloop do i=1, 9 print*, i -! CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: %[[VAL_2:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_4:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_2]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -36,12 +36,12 @@ program wsloop do i=1, 9 print*, i*2 -! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: %[[VAL_14:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_16:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_14]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref @@ -63,12 +63,12 @@ program wsloop !$OMP END DO NOWAIT ! CHECK: %[[VAL_28:.*]] = arith.constant 6 : i32 ! CHECK: fir.store %[[VAL_28]] to %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_29:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_0]] : !fir.ref +! CHECK: %[[VAL_30:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_31:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_29]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_30]]) to (%[[VAL_31]]) inclusive step (%[[VAL_32]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 index b62c54182442ac..531d604cedac41 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-do-simd-aligned.f90 @@ -1,11 +1,11 @@ ! This test checks lowering of OpenMP do simd aligned() pragma -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s subroutine testDoSimdAligned(int_array) use iso_c_binding type(c_ptr) :: int_array -!CHECK: not yet implemented: Unhandled clause ALIGNED in DO SIMD construct +!CHECK: not yet implemented: Unhandled clause ALIGNED in SIMD construct !$omp do simd aligned(int_array) do index_ = 1, 10 call c_test_call(int_array) diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 index a9e0446ec8c34e..2f5366c2a5b368 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 @@ -4,7 +4,7 @@ ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s subroutine testDoSimdLinear(int_array) integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause LINEAR in DO SIMD construct +!CHECK: not yet implemented: Unhandled clause LINEAR in DO construct !$omp do simd linear(int_array) do index_ = 1, 10 end do diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 deleted file mode 100644 index 054eb52ea170ac..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-safelen.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! This test checks lowering of OpenMP do simd safelen() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdSafelen(int_array) - integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause SAFELEN in DO SIMD construct -!$omp do simd safelen(4) - do index_ = 1, 10 - end do -!$omp end do simd - -end subroutine testDoSimdSafelen - diff --git a/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 b/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 deleted file mode 100644 index bd00b6f336c931..00000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-do-simd-simdlen.f90 +++ /dev/null @@ -1,14 +0,0 @@ -! This test checks lowering of OpenMP do simd simdlen() pragma - -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -subroutine testDoSimdSimdlen(int_array) - integer :: int_array(*) -!CHECK: not yet implemented: Unhandled clause SIMDLEN in DO SIMD construct -!$omp do simd simdlen(4) - do index_ = 1, 10 - end do -!$omp end do simd - -end subroutine testDoSimdSimdlen - diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 index 49d1142ea4b6aa..1e819b7a4b3ded 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction-byref.f90 @@ -18,12 +18,12 @@ subroutine red_and_delayed_private !$omp end parallel end subroutine -! CHECK-LABEL: omp.private {type = private} -! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { - ! CHECK-LABEL: omp.declare_reduction ! CHECK-SAME: @[[REDUCTION_SYM:.*]] : !fir.ref init +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { + ! CHECK-LABEL: _QPred_and_delayed_private ! CHECK: omp.parallel ! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref) diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 index d814b2b0ff0f31..cd8ce132213dda 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 @@ -21,12 +21,12 @@ subroutine red_and_delayed_private !$omp end parallel end subroutine -! CHECK-LABEL: omp.private {type = private} -! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { - ! CHECK-LABEL: omp.declare_reduction ! CHECK-SAME: @[[REDUCTION_SYM:.*]] : i32 init +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { + ! CHECK-LABEL: _QPred_and_delayed_private ! CHECK: omp.parallel ! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref) diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index ce4427a0c2cab2..bfb785179bc56a 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -7,23 +7,191 @@ program main integer :: i ! TODO When they are supported, add tests for: - ! - DISTRIBUTE PARALLEL DO - ! - DISTRIBUTE PARALLEL DO SIMD - ! - DISTRIBUTE SIMD ! - PARALLEL SECTIONS ! - PARALLEL WORKSHARE - ! - TARGET PARALLEL - ! - TARGET TEAMS DISTRIBUTE - ! - TARGET TEAMS DISTRIBUTE PARALLEL DO - ! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD - ! - TARGET TEAMS DISTRIBUTE SIMD - ! - TARGET UPDATE ! - TASKLOOP ! - TASKLOOP SIMD - ! - TEAMS DISTRIBUTE - ! - TEAMS DISTRIBUTE PARALLEL DO - ! - TEAMS DISTRIBUTE PARALLEL DO SIMD - ! - TEAMS DISTRIBUTE SIMD + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do simd + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp teams + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do + do i = 1, 10 + end do + !$omp end distribute parallel do + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute simd + do i = 1, 10 + end do + !$omp end distribute simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute simd + + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp distribute simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !$omp end teams ! ---------------------------------------------------------------------------- ! DO SIMD @@ -31,18 +199,31 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp do simd do i = 1, 10 end do !$omp end do simd ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp do simd if(.true.) do i = 1, 10 end do !$omp end do simd ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp do simd if(simd: .true.) do i = 1, 10 end do @@ -60,12 +241,14 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel if(.true.) i = 10 !$omp end parallel ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel if(parallel: .true.) i = 10 !$omp end parallel @@ -76,6 +259,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do do i = 1, 10 end do @@ -83,6 +269,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do if(.true.) do i = 1, 10 end do @@ -90,6 +280,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do if(parallel: .true.) do i = 1, 10 end do @@ -104,6 +298,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd do i = 1, 10 end do @@ -111,7 +308,13 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(.true.) do i = 1, 10 end do @@ -119,7 +322,13 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -130,6 +339,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(parallel: .true.) do i = 1, 10 end do @@ -139,6 +351,11 @@ program main ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -157,6 +374,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp simd if(.true.) do i = 1, 10 end do @@ -164,6 +382,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp simd if(simd: .true.) do i = 1, 10 end do @@ -180,11 +399,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target if(.true.) !$omp end target ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target if(target: .true.) !$omp end target @@ -199,11 +420,13 @@ program main ! CHECK: omp.target_data ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target data map(tofrom: i) if(.true.) !$omp end target data ! CHECK: omp.target_data ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target data map(tofrom: i) if(target data: .true.) !$omp end target data @@ -212,7 +435,6 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target_enter_data ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: map !$omp target enter data map(to: i) ! CHECK: omp.target_enter_data @@ -228,7 +450,6 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target_exit_data ! CHECK-NOT: if({{.*}}) - ! CHECK-SAME: map !$omp target exit data map(from: i) ! CHECK: omp.target_exit_data @@ -248,6 +469,9 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do do i = 1, 10 end do @@ -255,8 +479,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(.true.) do i = 1, 10 end do @@ -264,8 +493,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(target: .true.) if(parallel: .false.) do i = 1, 10 end do @@ -273,9 +507,13 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(target: .true.) do i = 1, 10 end do @@ -286,6 +524,10 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do if(parallel: .true.) do i = 1, 10 end do @@ -303,6 +545,9 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd do i = 1, 10 end do @@ -310,9 +555,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(.true.) do i = 1, 10 end do @@ -320,9 +572,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & !$omp& if(simd: .true.) do i = 1, 10 @@ -331,12 +590,16 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(target: .true.) do i = 1, 10 end do @@ -347,30 +610,91 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do !$omp end target parallel do simd ! ---------------------------------------------------------------------------- - ! TARGET SIMD + ! TARGET PARALLEL ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - ! CHECK: omp.simd + ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - !$omp target simd - do i = 1, 10 - end do - !$omp end target simd + !$omp target parallel + i = 1 + !$omp end target parallel ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target parallel if(.true.) + i = 1 + !$omp end target parallel + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target parallel if(target: .true.) if(parallel: .false.) + i = 1 + !$omp end target parallel + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target parallel if(target: .true.) + i = 1 + !$omp end target parallel + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target parallel if(parallel: .true.) + i = 1 + !$omp end target parallel + + ! ---------------------------------------------------------------------------- + ! TARGET SIMD + ! ---------------------------------------------------------------------------- + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target simd + do i = 1, 10 + end do + !$omp end target simd + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(.true.) do i = 1, 10 end do @@ -378,8 +702,10 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(target: .true.) if(simd: .false.) do i = 1, 10 end do @@ -387,6 +713,7 @@ program main ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { @@ -400,96 +727,885 @@ program main ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp target simd if(simd: .true.) do i = 1, 10 end do !$omp end target simd ! ---------------------------------------------------------------------------- - ! TARGET TEAMS + ! TARGET TEAMS DISTRIBUTE ! ---------------------------------------------------------------------------- - ! CHECK: omp.target ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - !$omp target teams - i = 1 - !$omp end target teams + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute + do i = 1, 10 + end do + !$omp end target teams distribute ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) - !$omp target teams if(.true.) - i = 1 - !$omp end target teams + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) - !$omp target teams if(target: .true.) if(teams: .false.) - i = 1 - !$omp end target teams + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute if(target: .true.) if(teams: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - !$omp target teams if(target: .true.) - i = 1 - !$omp end target teams + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute if(target: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute ! CHECK: omp.target ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) - !$omp target teams if(teams: .true.) - i = 1 - !$omp end target teams + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute if(teams: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute ! ---------------------------------------------------------------------------- - ! TASK + ! TARGET TEAMS DISTRIBUTE PARALLEL DO ! ---------------------------------------------------------------------------- - ! CHECK: omp.task + ! CHECK: omp.target ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - !$omp task - !$omp end task + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do + do i = 1, 10 + end do + !$omp end target teams distribute parallel do - ! CHECK: omp.task + ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) - !$omp task if(.true.) - !$omp end task + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do - ! CHECK: omp.task + ! CHECK: omp.target ! CHECK-SAME: if({{.*}}) - !$omp task if(task: .true.) - !$omp end task + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do if(target: .true.) if(teams: .false.) if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do if(teams: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do ! ---------------------------------------------------------------------------- - ! TEAMS + ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD ! ---------------------------------------------------------------------------- + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { - !$omp teams - i = 1 - !$omp end teams + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) - !$omp teams if(.true.) - i = 1 - !$omp end teams + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { ! CHECK: omp.teams ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(target: .true.) if(teams: .false.) if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute parallel do simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd if(target: .true.) if(teams: .false.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd if(target: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams distribute simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS + ! ---------------------------------------------------------------------------- + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams + i = 1 + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams if(.true.) + i = 1 + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams if(target: .true.) if(teams: .false.) + i = 1 + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams if(target: .true.) + i = 1 + !$omp end target teams + + ! CHECK: omp.target + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp target teams if(teams: .true.) + i = 1 + !$omp end target teams + + ! ---------------------------------------------------------------------------- + ! TARGET UPDATE + ! ---------------------------------------------------------------------------- + + ! CHECK: omp.target_update + ! CHECK-NOT: if({{.*}}) + !$omp target update to(i) + + ! CHECK: omp.target_update + ! CHECK-SAME: if({{.*}}) + !$omp target update to(i) if(.true.) + + ! CHECK: omp.target_update + ! CHECK-SAME: if({{.*}}) + !$omp target update to(i) if(target update: .true.) + + ! ---------------------------------------------------------------------------- + ! TASK + ! ---------------------------------------------------------------------------- + ! CHECK: omp.task + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp task + !$omp end task + + ! CHECK: omp.task + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp task if(.true.) + !$omp end task + + ! CHECK: omp.task + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp task if(task: .true.) + !$omp end task + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE + ! ---------------------------------------------------------------------------- + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute + do i = 1, 10 + end do + !$omp end teams distribute + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd if(teams: .false.) if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.parallel + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute parallel do simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute simd + do i = 1, 10 + end do + !$omp end teams distribute simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute simd if(teams: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.distribute + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK: omp.simd + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams distribute simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + ! ---------------------------------------------------------------------------- + ! TEAMS + ! ---------------------------------------------------------------------------- + ! CHECK: omp.teams + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + !$omp teams + i = 1 + !$omp end teams + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { + !$omp teams if(.true.) + i = 1 + !$omp end teams + + ! CHECK: omp.teams + ! CHECK-SAME: if({{.*}}) + ! CHECK-SAME: { !$omp teams if(teams: .true.) i = 1 !$omp end teams diff --git a/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 b/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 new file mode 100644 index 00000000000000..737e182c8781c9 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-do-simd-safelen.f90 @@ -0,0 +1,16 @@ +! This test checks lowering of OpenMP do simd safelen() pragma + +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s +subroutine testDoSimdSafelen(int_array) + integer :: int_array(*) + + !CHECK: omp.wsloop { + !CHECK: omp.simd safelen(4) { + !CHECK: omp.loop_nest {{.*}} { + !$omp do simd safelen(4) + do index_ = 1, 10 + end do + !$omp end do simd + +end subroutine testDoSimdSafelen diff --git a/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 b/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 new file mode 100644 index 00000000000000..8c7c771ded643f --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-do-simd-simdlen.f90 @@ -0,0 +1,16 @@ +! This test checks lowering of OpenMP do simd simdlen() pragma + +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s +subroutine testDoSimdSimdlen(int_array) + integer :: int_array(*) + + !CHECK: omp.wsloop { + !CHECK: omp.simd simdlen(4) { + !CHECK: omp.loop_nest {{.*}} { + !$omp do simd simdlen(4) + do index_ = 1, 10 + end do + !$omp end do simd + +end subroutine testDoSimdSimdlen diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90 index 2a4e338f255ebb..14bd0d9a24572a 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction3.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90 @@ -69,13 +69,13 @@ ! CHECK: omp.parallel { ! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 -! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_19:.*]] = fir.alloca !fir.box> -! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_19]] : !fir.ref>> -! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxi32 %[[VAL_19]] -> %[[VAL_20:.*]] : !fir.ref>>) { -! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref>> +! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref>>) { +! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) { ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref>> diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 190aa615212176..4002f041bd0148 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -26,10 +26,10 @@ subroutine simd_with_if_clause(n, threshold) ! CHECK: %[[ARG_N:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_if_clauseEn"} : (!fir.ref) -> (!fir.ref, !fir.ref) integer :: i, n, threshold !$OMP SIMD IF( n .GE. threshold ) + ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0 ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: omp.simd if(%[[COND:.*]]) { ! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90 index fa6ec219a490eb..3d4e9bc505f281 100644 --- a/flang/test/Lower/OpenMP/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90 @@ -16,12 +16,12 @@ program wsloop do i=1, 9 print*, i -! CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: %[[VAL_2:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_4:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_2]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -37,12 +37,12 @@ program wsloop do i=1, 9 print*, i*2 -! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: %[[VAL_14:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_16:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_14]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref @@ -64,12 +64,12 @@ program wsloop !$OMP END DO NOWAIT ! CHECK: %[[VAL_28:.*]] = arith.constant 6 : i32 ! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_0]]#0 : i32, !fir.ref -! CHECK: %[[VAL_29:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 -! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { -! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_31:.*]] = arith.constant 9 : i32 +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop schedule(static = %[[VAL_29]] : i32) nowait { +! CHECK-NEXT: omp.loop_nest (%[[ARG2:.*]]) : i32 = (%[[VAL_30]]) to (%[[VAL_31]]) inclusive step (%[[VAL_32]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 index 6c9bc75b81d700..763ea6a6268a58 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 @@ -79,13 +79,13 @@ subroutine reduce(r) ! CHECK: omp.parallel { ! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 -! CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 -! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> -! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_9]] : !fir.ref>> -! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxf64 %[[VAL_9]] -> %[[VAL_10:.*]] : !fir.ref>>) { -! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref>> +! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref>>) { +! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 index 7aecf870cf800a..a483033abcf95f 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 @@ -73,14 +73,14 @@ program reduce ! CHECK: omp.parallel { ! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 -! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 -! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> -! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> -! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) { -! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref>> +! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref>>) { +! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 index d1d8a2c599444a..b8cb326815484e 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 @@ -73,14 +73,14 @@ program reduce ! CHECK: omp.parallel { ! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 -! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 -! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> -! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> -! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> -! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) { -! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref>> +! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref>>) { +! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref>> diff --git a/flang/test/Transforms/DoConcurrent/basic.f90 b/flang/test/Transforms/DoConcurrent/basic.f90 index a555a25c9bad5d..15faddb4f17fe1 100644 --- a/flang/test/Transforms/DoConcurrent/basic.f90 +++ b/flang/test/Transforms/DoConcurrent/basic.f90 @@ -23,7 +23,8 @@ program do_concurrent_basic ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK: omp.wsloop { + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref @@ -33,6 +34,8 @@ program do_concurrent_basic ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref ! CHECK-NEXT: omp.yield ! CHECK-NEXT: } + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } ! CHECK-NEXT: omp.terminator ! CHECK-NEXT: } diff --git a/flang/test/Transforms/DoConcurrent/basic.mlir b/flang/test/Transforms/DoConcurrent/basic.mlir index 7d62463f36d422..764e62b647f913 100644 --- a/flang/test/Transforms/DoConcurrent/basic.mlir +++ b/flang/test/Transforms/DoConcurrent/basic.mlir @@ -31,7 +31,8 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas // CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index // CHECK: %[[STEP:.*]] = arith.constant 1 : index - // CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { // CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 // CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref // CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref @@ -41,6 +42,8 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas // CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref // CHECK-NEXT: omp.yield // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } // CHECK-NEXT: omp.terminator // CHECK-NEXT: } diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 74bf1da4c77255..439496d8da602e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -602,7 +602,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", [SameVariadicOperandSize, def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, RecursiveMemoryEffects, ReductionClauseInterface, - SingleBlockImplicitTerminator<"TerminatorOp">]> { + SingleBlock]> { let summary = "worksharing-loop construct"; let description = [{ The worksharing-loop construct specifies that the iterations of the loop(s) @@ -715,8 +715,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, - SingleBlockImplicitTerminator<"TerminatorOp">]> { + RecursiveMemoryEffects, SingleBlock]> { let summary = "simd construct"; let description = [{ The simd construct can be applied to a loop to indicate that the loop can be @@ -820,8 +819,7 @@ def YieldOp : OpenMP_Op<"yield", //===----------------------------------------------------------------------===// def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments, DeclareOpInterfaceMethods, - RecursiveMemoryEffects, - SingleBlockImplicitTerminator<"TerminatorOp">]> { + RecursiveMemoryEffects, SingleBlock]> { let summary = "distribute construct"; let description = [{ The distribute construct specifies that the iterations of one or more loops @@ -1014,7 +1012,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments, AutomaticAllocationScope, DeclareOpInterfaceMethods, RecursiveMemoryEffects, ReductionClauseInterface, - SingleBlockImplicitTerminator<"TerminatorOp">]> { + SingleBlock]> { let summary = "taskloop construct"; let description = [{ The taskloop construct specifies that the iterations of one or more diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index ab9b78e755d9d5..126b534ab1abd9 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -102,8 +102,14 @@ def LoopWrapperInterface : OpInterface<"LoopWrapperInterface"> { Operation &firstOp = *r.op_begin(); Operation &secondOp = *(std::next(r.op_begin())); - return ::llvm::isa(firstOp) && - secondOp.hasTrait(); + + if (!secondOp.hasTrait()) + return false; + + if (auto wrapper = ::llvm::dyn_cast(firstOp)) + return wrapper.isWrapper(); + + return ::llvm::isa(firstOp); }] >, InterfaceMethod< diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 057a0282f39e03..576b798aeef7df 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1385,10 +1385,14 @@ Operation *TargetOp::getInnermostCapturedOmpOp() { bool TargetOp::isTargetSPMDLoop() { Operation *capturedOp = getInnermostCapturedOmpOp(); - if (!isa_and_present(capturedOp)) + if (!isa_and_present(capturedOp)) return false; - Operation *parallelOp = capturedOp->getParentOp(); + Operation *workshareOp = capturedOp->getParentOp(); + if (!isa_and_present(workshareOp)) + return false; + + Operation *parallelOp = workshareOp->getParentOp(); if (!isa_and_present(parallelOp)) return false; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9688dc15f48730..ab83dd3ab0d3c6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -259,6 +259,143 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { llvm_unreachable("Unknown ClauseProcBindKind kind"); } +/// Populate a set of previously created llvm.alloca instructions that are only +/// used inside of the given region but defined outside of it. Allocations of +/// non-primitive types are skipped by this function. +static void getSinkableAllocas(LLVM::ModuleTranslation &moduleTranslation, + Region ®ion, + SetVector &allocasToSink) { + Operation *op = region.getParentOp(); + + for (auto storeOp : region.getOps()) { + Value storeAddr = storeOp.getAddr(); + Operation *addrOp = storeAddr.getDefiningOp(); + + // The destination address is already defined in this region or it is not an + // llvm.alloca operation, so skip it. + if (!isa_and_present(addrOp) || op->isAncestor(addrOp)) + continue; + + // Get LLVM value to which the address is mapped. It has to be mapped to the + // allocation instruction of a scalar type to be marked as sinkable by this + // function. + llvm::Value *llvmAddr = moduleTranslation.lookupValue(storeAddr); + if (!isa_and_present(llvmAddr)) + continue; + + auto *llvmAlloca = cast(llvmAddr); + if (llvmAlloca->getAllocatedType()->getPrimitiveSizeInBits() == 0) + continue; + + // Check that the address is only used inside of the region. + bool addressUsedOnlyInternally = true; + for (auto &addrUse : storeAddr.getUses()) { + if (!op->isAncestor(addrUse.getOwner())) { + addressUsedOnlyInternally = false; + break; + } + } + + if (!addressUsedOnlyInternally) + continue; + + allocasToSink.insert(llvmAlloca); + } +} + +// TODO: Make this a top-level conversion function (i.e. part of the switch +// statement in `convertHostOrTargetOperation`) independent from parent +// worksharing operations and update `convertOmpWsloop` to rely on this rather +// than replicating the same logic. +static std::optional< + std::tuple> +convertLoopNestHelper(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + StringRef blockName) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + auto loopOp = cast(opInst); + + // Set up the source location value for OpenMP runtime. + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + + SetVector allocasToSink; + getSinkableAllocas(moduleTranslation, loopOp.getRegion(), allocasToSink); + + // Generator of the canonical loop body. + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + SmallVector loopInfos; + SmallVector bodyInsertPoints; + LogicalResult bodyGenStatus = success(); + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + // Make sure further conversions know about the induction variable. + moduleTranslation.mapValue( + loopOp.getRegion().front().getArgument(loopInfos.size()), iv); + + // Capture the body insertion point for use in nested loops. BodyIP of the + // CanonicalLoopInfo always points to the beginning of the entry block of + // the body. + bodyInsertPoints.push_back(ip); + + if (loopInfos.size() != loopOp.getNumLoops() - 1) + return; + + // Convert the body of the loop, adding lifetime markers to allocations that + // can be sunk into the new block. + builder.restoreIP(ip); + for (auto *alloca : allocasToSink) { + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeStart(alloca, builder.getInt64(size)); + } + llvm::BasicBlock *cont = + convertOmpOpRegions(loopOp.getRegion(), blockName, builder, + moduleTranslation, bodyGenStatus); + builder.SetInsertPoint(cont, cont->begin()); + for (auto *alloca : allocasToSink) { + unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; + builder.CreateLifetimeEnd(alloca, builder.getInt64(size)); + } + }; + + // Delegate actual loop construction to the OpenMP IRBuilder. + // TODO: this currently assumes omp.loop_nest is semantically similar to SCF + // loop, i.e. it has a positive step, uses signed integer semantics. + // Reconsider this code when the nested loop operation clearly supports more + // cases. + for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { + llvm::Value *lowerBound = + moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); + llvm::Value *upperBound = + moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); + llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); + + // Make sure loop trip count are emitted in the preheader of the outermost + // loop at the latest so that they are all available for the new collapsed + // loop will be created below. + llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; + llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; + if (i != 0) { + loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); + computeIP = loopInfos.front()->getPreheaderIP(); + } + loopInfos.push_back(ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, loopOp.getInclusive(), computeIP)); + + if (failed(bodyGenStatus)) + return std::nullopt; + } + + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. + llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); + llvm::CanonicalLoopInfo *loopInfo = + ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); + + return std::make_tuple(ompLoc, afterIP, loopInfo); +} + /// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, @@ -886,50 +1023,6 @@ static void collectReductionInfo( } } -/// Populate a set of previously created llvm.alloca instructions that are only -/// used inside of the given region but defined outside of it. Allocations of -/// non-primitive types are skipped by this function. -static void getSinkableAllocas(LLVM::ModuleTranslation &moduleTranslation, - Region ®ion, - SetVector &allocasToSink) { - Operation *op = region.getParentOp(); - - for (auto storeOp : region.getOps()) { - Value storeAddr = storeOp.getAddr(); - Operation *addrOp = storeAddr.getDefiningOp(); - - // The destination address is already defined in this region or it is not an - // llvm.alloca operation, so skip it. - if (!isa_and_present(addrOp) || op->isAncestor(addrOp)) - continue; - - // Get LLVM value to which the address is mapped. It has to be mapped to the - // allocation instruction of a scalar type to be marked as sinkable by this - // function. - llvm::Value *llvmAddr = moduleTranslation.lookupValue(storeAddr); - if (!isa_and_present(llvmAddr)) - continue; - - auto *llvmAlloca = cast(llvmAddr); - if (llvmAlloca->getAllocatedType()->getPrimitiveSizeInBits() == 0) - continue; - - // Check that the address is only used inside of the region. - bool addressUsedOnlyInternally = true; - for (auto &addrUse : storeAddr.getUses()) { - if (!op->isAncestor(addrUse.getOwner())) { - addressUsedOnlyInternally = false; - break; - } - } - - if (!addressUsedOnlyInternally) - continue; - - allocasToSink.insert(llvmAlloca); - } -} - /// handling of DeclareReductionOp's cleanup region static LogicalResult inlineReductionCleanup( llvm::SmallVectorImpl &reductionDecls, @@ -974,6 +1067,7 @@ static LogicalResult convertOmpWsloop( SmallVector &owningAtomicReductionGens, SmallVector &reductionInfos) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + // FIXME: This ignores any other nested wrappers (e.g. omp.simd). auto wsloopOp = cast(opInst); auto loopOp = cast(wsloopOp.getWrappedLoop()); const bool isByRef = wsloopOp.getByref(); @@ -1053,7 +1147,7 @@ static LogicalResult convertOmpWsloop( llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); SetVector allocasToSink; - getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink); + getSinkableAllocas(moduleTranslation, loopOp.getRegion(), allocasToSink); // Generator of the canonical loop body. // TODO: support error propagation in OpenMPIRBuilder and use it instead of @@ -1500,85 +1594,16 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, static LogicalResult convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto simdOp = cast(opInst); auto loopOp = cast(simdOp.getWrappedLoop()); - llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - - SetVector allocasToSink; - getSinkableAllocas(moduleTranslation, loop.getRegion(), allocasToSink); - - // Generator of the canonical loop body. - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - SmallVector loopInfos; - SmallVector bodyInsertPoints; - LogicalResult bodyGenStatus = success(); - auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { - // Make sure further conversions know about the induction variable. - moduleTranslation.mapValue( - loopOp.getRegion().front().getArgument(loopInfos.size()), iv); - - // Capture the body insertion point for use in nested loops. BodyIP of the - // CanonicalLoopInfo always points to the beginning of the entry block of - // the body. - bodyInsertPoints.push_back(ip); - - if (loopInfos.size() != loopOp.getNumLoops() - 1) - return; - - // Convert the body of the loop, adding lifetime markers to allocations that - // can be sunk into the new block. - builder.restoreIP(ip); - for (auto *alloca : allocasToSink) { - unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; - builder.CreateLifetimeStart(alloca, builder.getInt64(size)); - } - llvm::BasicBlock *cont = - convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, - moduleTranslation, bodyGenStatus); - builder.SetInsertPoint(cont, cont->begin()); - for (auto *alloca : allocasToSink) { - unsigned size = alloca->getAllocatedType()->getPrimitiveSizeInBits() / 8; - builder.CreateLifetimeEnd(alloca, builder.getInt64(size)); - } - }; - - // Delegate actual loop construction to the OpenMP IRBuilder. - // TODO: this currently assumes omp.loop_nest is semantically similar to SCF - // loop, i.e. it has a positive step, uses signed integer semantics. - // Reconsider this code when the nested loop operation clearly supports more - // cases. - llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - for (unsigned i = 0, e = loopOp.getNumLoops(); i < e; ++i) { - llvm::Value *lowerBound = - moduleTranslation.lookupValue(loopOp.getLowerBound()[i]); - llvm::Value *upperBound = - moduleTranslation.lookupValue(loopOp.getUpperBound()[i]); - llvm::Value *step = moduleTranslation.lookupValue(loopOp.getStep()[i]); - - // Make sure loop trip count are emitted in the preheader of the outermost - // loop at the latest so that they are all available for the new collapsed - // loop will be created below. - llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; - llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; - if (i != 0) { - loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), - ompLoc.DL); - computeIP = loopInfos.front()->getPreheaderIP(); - } - loopInfos.push_back(ompBuilder->createCanonicalLoop( - loc, bodyGen, lowerBound, upperBound, step, - /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); - - if (failed(bodyGenStatus)) - return failure(); - } + auto loopNestConversionResult = convertLoopNestHelper( + *loopOp, builder, moduleTranslation, "omp.simd.region"); + if (!loopNestConversionResult) + return failure(); - // Collapse loops. - llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *loopInfo = - ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); + auto [ompLoc, afterIP, loopInfo] = *loopNestConversionResult; llvm::ConstantInt *simdlen = nullptr; if (std::optional simdlenVar = simdOp.getSimdlen()) @@ -2815,6 +2840,13 @@ static LogicalResult convertOmpDistribute( llvm::OpenMPIRBuilder::InsertPointTy *redAllocaIP, SmallVector &reductionInfos) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + // FIXME: This ignores any other nested wrappers (e.g. omp.parallel + + // omp.wsloop, omp.simd). + auto distributeOp = cast(opInst); + auto loopOp = cast(distributeOp.getWrappedLoop()); + + SmallVector loopWrappers; + loopOp.gatherWrappers(loopWrappers); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of @@ -2830,12 +2862,24 @@ static LogicalResult convertOmpDistribute( // DistributeOp has only one region associated with it. builder.restoreIP(codeGenIP); *redAllocaIP = allocaIP; - mlir::Region ® = opInst.getRegion(0); - auto regionBlock = - convertOmpOpRegions(reg, "omp.distribute.region", builder, - moduleTranslation, bodyGenStatus); - builder.SetInsertPoint(regionBlock->getTerminator()); + if (loopWrappers.size() == 1) { + // Convert a standalone DISTRIBUTE construct. + auto loopNestConversionResult = convertLoopNestHelper( + *loopOp, builder, moduleTranslation, "omp.distribute.region"); + if (!loopNestConversionResult) + return; // TODO: Signal error to abort translation. + + builder.restoreIP(std::get(*loopNestConversionResult)); + } else { + // Convert a DISTRIBUTE leaf as part of a composite construct. + mlir::Region ® = distributeOp.getRegion(); + auto *regionBlock = + convertOmpOpRegions(reg, "omp.distribute.region", builder, + moduleTranslation, bodyGenStatus); + + builder.SetInsertPoint(regionBlock->getTerminator()); + } // FIXME(JAN): We need to know if we are inside a distribute and // if there is an inner wsloop reduction, in that case we need to @@ -3132,8 +3176,8 @@ static void initTargetDefaultBounds( } } else if (castOrGetParentOfType(innermostCapturedOmpOp, /*immediateParent=*/true) || - castOrGetParentOfType(innermostCapturedOmpOp, - /*immediateParent=*/true)) { + castOrGetParentOfType(innermostCapturedOmpOp, + /*immediateParent=*/true)) { minTeamsVal = maxTeamsVal = 1; } else { minTeamsVal = maxTeamsVal = -1; @@ -3171,12 +3215,12 @@ static void initTargetDefaultBounds( // Extract MAX_THREADS clause from PARALLEL or set to 1 if it's SIMD. if (innermostCapturedOmpOp) { if (auto parallelOp = - castOrGetParentOfType(innermostCapturedOmpOp, - /*immediateParent=*/true)) { + castOrGetParentOfType(innermostCapturedOmpOp)) { Value numThreadsClause = isTargetDevice ? parallelOp.getNumThreadsVar() : targetOp.getNumThreads(); setMaxValueFromClause(numThreadsClause, maxThreadsVal); - } else if (isa(innermostCapturedOmpOp)) { + } else if (castOrGetParentOfType(innermostCapturedOmpOp, + /*immediateParent=*/true)) { maxThreadsVal = 1; } } @@ -3196,19 +3240,31 @@ static void initTargetDefaultBounds( // for now. int32_t reductionDataSize = 0; if (isGPU && innermostCapturedOmpOp) { - if (auto wsLoopOp = - mlir::dyn_cast(innermostCapturedOmpOp)) { - if (wsLoopOp.getNumReductionVars() > 0) { - assert(wsLoopOp.getNumReductionVars() && - "Only 1 reduction variable currently supported"); - mlir::Value reductionVar = wsLoopOp.getReductionVars()[0]; - DataLayout dl = - DataLayout(innermostCapturedOmpOp->getParentOfType()); - - mlir::Type reductionVarTy = reductionVar.getType(); - uint64_t sizeInBits = dl.getTypeSizeInBits(reductionVarTy); - uint64_t sizeInBytes = sizeInBits / 8; - reductionDataSize = sizeInBytes; + if (auto loopNestOp = + mlir::dyn_cast(innermostCapturedOmpOp)) { + // FIXME: This treats 'DO SIMD' as if it was a 'DO' construct. Reductions + // on other constructs apart from 'DO' aren't considered either. + mlir::omp::WsloopOp wsloopOp = nullptr; + SmallVector wrappers; + loopNestOp.gatherWrappers(wrappers); + for (auto wrapper : wrappers) { + wsloopOp = mlir::dyn_cast(*wrapper); + if (wsloopOp) + break; + } + if (wsloopOp) { + if (wsloopOp.getNumReductionVars() > 0) { + assert(wsloopOp.getNumReductionVars() && + "Only 1 reduction variable currently supported"); + mlir::Value reductionVar = wsloopOp.getReductionVars()[0]; + DataLayout dl = + DataLayout(innermostCapturedOmpOp->getParentOfType()); + + mlir::Type reductionVarTy = reductionVar.getType(); + uint64_t sizeInBits = dl.getTypeSizeInBits(reductionVarTy); + uint64_t sizeInBytes = sizeInBits / 8; + reductionDataSize = sizeInBytes; + } } } } @@ -3455,24 +3511,45 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, return success(); } -// Returns true if the operation is inside a TargetOp or -// is part of a declare target function. -static bool isTargetDeviceOp(Operation *op) { - // Assumes no reverse offloading - if (op->getParentOfType()) - return true; +/////////////////////////////////////////////////////////////////////////////// +// CompoundConstructs lowering forward declarations +class OpenMPDialectLLVMIRTranslationInterface; - if (auto parentFn = op->getParentOfType()) - if (auto declareTargetIface = - llvm::dyn_cast( - parentFn.getOperation())) - if (declareTargetIface.isDeclareTarget() && - declareTargetIface.getDeclareTargetDeviceType() != - mlir::omp::DeclareTargetDeviceType::host) - return true; +using ConvertFunctionTy = std::function( + Operation *, llvm::IRBuilderBase &, LLVM::ModuleTranslation &)>; - return false; -} +class ConversionDispatchList { +private: + llvm::SmallVector functions; + +public: + std::pair + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + for (auto riter = functions.rbegin(); riter != functions.rend(); ++riter) { + bool match = false; + LogicalResult result = failure(); + std::tie(match, result) = (*riter)(op, builder, moduleTranslation); + if (match) + return {true, result}; + } + return {false, failure()}; + } + + void pushConversionFunction(ConvertFunctionTy function) { + functions.push_back(function); + } + void popConversionFunction() { functions.pop_back(); } +}; + +static LogicalResult convertOmpDistributeParallelWsloop( + Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel, + omp::WsloopOp wsloop, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + ConversionDispatchList &dispatchList); + +/////////////////////////////////////////////////////////////////////////////// +// Dispatch functions /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). @@ -3596,87 +3673,23 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, }); } -static LogicalResult -convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - return convertHostOrTargetOperation(op, builder, moduleTranslation); -} - -static LogicalResult -convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - if (isa(op)) - return convertOmpTarget(*op, builder, moduleTranslation); - if (isa(op)) - return convertOmpTargetData(op, builder, moduleTranslation); - bool interrupted = - op->walk([&](Operation *oper) { - if (isa(oper)) { - if (failed(convertOmpTarget(*oper, builder, moduleTranslation))) - return WalkResult::interrupt(); - return WalkResult::skip(); - } - if (isa(oper)) { - if (failed(convertOmpTargetData(oper, builder, moduleTranslation))) - return WalkResult::interrupt(); - return WalkResult::skip(); - } - return WalkResult::advance(); - }).wasInterrupted(); - return failure(interrupted); -} - -/////////////////////////////////////////////////////////////////////////////// -// CombinedConstructs lowering forward declarations -class OpenMPDialectLLVMIRTranslationInterface; - -using ConvertFunctionTy = std::function( - Operation *, llvm::IRBuilderBase &, LLVM::ModuleTranslation &)>; - -class ConversionDispatchList { -private: - llvm::SmallVector functions; - -public: - std::pair - convertOperation(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - for (auto riter = functions.rbegin(); riter != functions.rend(); ++riter) { - bool match = false; - LogicalResult result = failure(); - std::tie(match, result) = (*riter)(op, builder, moduleTranslation); - if (match) - return {true, result}; - } - return {false, failure()}; - } - - void pushConversionFunction(ConvertFunctionTy function) { - functions.push_back(function); - } - void popConversionFunction() { functions.pop_back(); } -}; +// Returns true if the operation is inside a TargetOp or is part of a declare +// target function. +static bool isTargetDeviceOp(Operation *op) { + // Assumes no reverse offloading + if (op->getParentOfType()) + return true; -static LogicalResult convertOmpDistributeParallelWsloop( - Operation *op, omp::DistributeOp distribute, omp::ParallelOp parallel, - omp::WsloopOp wsloop, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - ConversionDispatchList &dispatchList); + if (auto parentFn = op->getParentOfType()) + if (auto declareTargetIface = + llvm::dyn_cast( + parentFn.getOperation())) + if (declareTargetIface.isDeclareTarget() && + declareTargetIface.getDeclareTargetDeviceType() != + mlir::omp::DeclareTargetDeviceType::host) + return true; -/////////////////////////////////////////////////////////////////////////////// -// Dispatch functions -static LogicalResult -convertCommonOperation(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - if (ompBuilder->Config.isTargetDevice()) { - if (isTargetDeviceOp(op)) { - return convertTargetDeviceOp(op, builder, moduleTranslation); - } else { - return convertTargetOpsInNest(op, builder, moduleTranslation); - } - } - return convertHostOrTargetOperation(op, builder, moduleTranslation); + return false; } // Returns true if the given block has a single instruction. @@ -3748,10 +3761,9 @@ bool matchOpScanNest(Block &block, FirstOpType &firstOp, } static LogicalResult -convertInternalTargetOp(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - ConversionDispatchList &dispatchList) { - +convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + ConversionDispatchList &dispatchList) { omp::DistributeOp distribute; omp::ParallelOp parallel; omp::WsloopOp wsloop; @@ -3762,12 +3774,12 @@ convertInternalTargetOp(Operation *op, llvm::IRBuilderBase &builder, dispatchList); } - return convertCommonOperation(op, builder, moduleTranslation); + return convertHostOrTargetOperation(op, builder, moduleTranslation); } static LogicalResult -convertTopLevelTargetOp(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { +convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { if (isa(op)) return convertOmpTarget(*op, builder, moduleTranslation); if (isa(op)) @@ -3789,29 +3801,8 @@ convertTopLevelTargetOp(Operation *op, llvm::IRBuilderBase &builder, return failure(interrupted); } -/// Implementation of the dialect interface that converts operations belonging -/// to the OpenMP dialect to LLVM IR. -class OpenMPDialectLLVMIRTranslationInterface - : public LLVMTranslationDialectInterface { -private: - mutable ConversionDispatchList dispatchList; - -public: - using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; - - /// Translates the given operation to LLVM IR using the provided IR builder - /// and saving the state in `moduleTranslation`. - LogicalResult - convertOperation(Operation *op, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) const final; - - /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime - /// calls, or operation amendments - LogicalResult - amendOperation(Operation *op, ArrayRef instructions, - NamedAttribute attribute, - LLVM::ModuleTranslation &moduleTranslation) const final; -}; +/////////////////////////////////////////////////////////////////////////////// +// CompoundConstructs lowering implementations // Implementation converting a nest of operations in a single function. This // just overrides the parallel and wsloop dispatches but does the normal @@ -3856,6 +3847,33 @@ static LogicalResult convertOmpDistributeParallelWsloop( return result; } +/////////////////////////////////////////////////////////////////////////////// +// OpenMPDialectLLVMIRTranslationInterface + +/// Implementation of the dialect interface that converts operations belonging +/// to the OpenMP dialect to LLVM IR. +class OpenMPDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +private: + mutable ConversionDispatchList dispatchList; + +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + /// Translates the given operation to LLVM IR using the provided IR builder + /// and saving the state in `moduleTranslation`. + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const final; + + /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime + /// calls, or operation amendments + LogicalResult + amendOperation(Operation *op, ArrayRef instructions, + NamedAttribute attribute, + LLVM::ModuleTranslation &moduleTranslation) const final; +}; + LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( Operation *op, ArrayRef instructions, NamedAttribute attribute, @@ -3944,23 +3962,6 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation( return failure(); } -static bool isInternalTargetDeviceOp(Operation *op) { - // Assumes no reverse offloading - if (op->getParentOfType()) - return true; - - if (auto parentFn = op->getParentOfType()) - if (auto declareTargetIface = - llvm::dyn_cast( - parentFn.getOperation())) - if (declareTargetIface.isDeclareTarget() && - declareTargetIface.getDeclareTargetDeviceType() != - mlir::omp::DeclareTargetDeviceType::host) - return true; - - return false; -} - /// Given an OpenMP MLIR operation, create the corresponding LLVM IR /// (including OpenMP runtime calls). LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( @@ -3978,15 +3979,12 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); if (ompBuilder->Config.isTargetDevice()) { - if (isInternalTargetDeviceOp(op)) { - return convertInternalTargetOp(op, builder, moduleTranslation, - dispatchList); - } else { - return convertTopLevelTargetOp(op, builder, moduleTranslation); - } + if (isTargetDeviceOp(op)) + return convertTargetDeviceOp(op, builder, moduleTranslation, + dispatchList); + return convertTargetOpsInNest(op, builder, moduleTranslation); } - - return convertCommonOperation(op, builder, moduleTranslation); + return convertHostOrTargetOperation(op, builder, moduleTranslation); } void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 3aeb9e70522d52..4c9e09970279a1 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -174,6 +174,7 @@ func.func @loop_nest_block_arg(%val : i32, %ub : i32, %i : index) { ^bb3: omp.yield } + omp.terminator } return } diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c4a88d974fdd61..a6431885784104 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -11,8 +11,8 @@ func.func @unknown_clause() { // ----- func.func @not_wrapper() { + // expected-error@+1 {{op must be a loop wrapper}} omp.distribute { - // expected-error@+1 {{op must take a loop wrapper role if nested inside of 'omp.distribute'}} omp.parallel { %0 = arith.constant 0 : i32 omp.terminator @@ -363,12 +363,16 @@ func.func @omp_simd() -> () { // ----- -func.func @omp_simd_nested_wrapper() -> () { +func.func @omp_simd_nested_wrapper(%lb : index, %ub : index, %step : index) -> () { // expected-error @below {{op must wrap an 'omp.loop_nest' directly}} omp.simd { omp.distribute { + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { + omp.yield + } omp.terminator } + omp.terminator } return } @@ -1917,6 +1921,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { } omp.terminator } + omp.terminator } return } @@ -2115,11 +2120,13 @@ func.func @omp_distribute_wrapper() -> () { // ----- -func.func @omp_distribute_nested_wrapper(%data_var : memref) -> () { +func.func @omp_distribute_nested_wrapper(%lb: index, %ub: index, %step: index) -> () { // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simd'}} "omp.distribute"() ({ "omp.wsloop"() ({ - %0 = arith.constant 0 : i32 + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { + omp.yield + } "omp.terminator"() : () -> () }) : () -> () "omp.terminator"() : () -> () diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index c3fdfb45cea4cd..5453245a0119a2 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -601,6 +601,7 @@ func.func @omp_simd_pretty(%lb : index, %ub : index, %step : index) -> () { omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -616,6 +617,7 @@ func.func @omp_simd_pretty_aligned(%lb : index, %ub : index, %step : index, omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -627,6 +629,7 @@ func.func @omp_simd_pretty_if(%lb : index, %ub : index, %step : index, %if_cond omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -640,6 +643,7 @@ func.func @omp_simd_pretty_nontemporal(%lb : index, %ub : index, %step : index, omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -651,6 +655,7 @@ func.func @omp_simd_pretty_order(%lb : index, %ub : index, %step : index) -> () omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -662,6 +667,7 @@ func.func @omp_simd_pretty_simdlen(%lb : index, %ub : index, %step : index) -> ( omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -673,6 +679,7 @@ func.func @omp_simd_pretty_safelen(%lb : index, %ub : index, %step : index) -> ( omp.loop_nest (%iv): index = (%lb) to (%ub) step (%step) { omp.yield } + omp.terminator } return } @@ -692,30 +699,35 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref, %arg0 : i3 omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute dist_schedule_static omp.distribute dist_schedule_static { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32) omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute order(concurrent) omp.distribute order(concurrent) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute allocate(%{{.+}} : memref -> %{{.+}} : memref) omp.distribute allocate(%data_var : memref -> %data_var : memref) { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } // CHECK: omp.distribute omp.distribute { @@ -723,7 +735,9 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref, %arg0 : i3 omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } + omp.terminator } + omp.terminator } return } @@ -2173,6 +2187,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testbool = "test.bool"() : () -> (i1) @@ -2183,6 +2198,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop final(%{{[^)]+}}) { @@ -2191,6 +2207,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop untied { @@ -2199,6 +2216,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop mergeable { @@ -2207,6 +2225,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testf32 = "test.f32"() : () -> (!llvm.ptr) @@ -2217,6 +2236,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) { @@ -2225,6 +2245,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) { @@ -2233,6 +2254,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testi32 = "test.i32"() : () -> (i32) @@ -2242,6 +2264,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testmemref = "test.memref"() : () -> (memref) @@ -2251,6 +2274,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } %testi64 = "test.i64"() : () -> (i64) @@ -2260,6 +2284,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) { @@ -2268,6 +2293,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop nogroup { @@ -2276,6 +2302,7 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } // CHECK: omp.taskloop { @@ -2285,7 +2312,9 @@ func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { // CHECK: omp.yield omp.yield } + omp.terminator } + omp.terminator } // CHECK: return diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 6d160142e0d3b7..6e95cda52ea8fa 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -699,7 +699,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -726,6 +726,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -749,6 +750,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -769,6 +771,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -788,6 +791,7 @@ llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : llvm.store %3, %5 : f32, !llvm.ptr omp.yield } + omp.terminator } llvm.return } @@ -816,6 +820,7 @@ llvm.func @simd_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr {fi llvm.store %arg2, %1 : i32, !llvm.ptr omp.yield } + omp.terminator } llvm.return }